diff --git a/.gitignore b/.gitignore
index 3fb3f81..ee1479b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,13 @@
 /Bin/
 *.opensdf
 *.bat
+*.zip
+.vs/*
+
+## Xcode
+*.pbxuser
+!default.pbxuser
+xcuserdata/
+*.xcuserdatad
+*.xccheckout
+build/
diff --git a/AdaptiveSolvers.sln b/AdaptiveSolvers.sln
new file mode 100644
index 0000000..3d77c31
--- /dev/null
+++ b/AdaptiveSolvers.sln
@@ -0,0 +1,136 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.26730.16
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ZLIB", "ZLIB.vcxproj", "{D3D173AB-D306-4179-BEC4-95CE1B14E647}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SSDRecon", "SSDRecon.vcxproj", "{477765F8-C16A-406B-807F-1302DAE74EBA}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PNG", "PNG.vcxproj", "{B5899B32-FAC2-477E-99AA-86736B97F2FC}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JPEG", "JPEG.vcxproj", "{0BE55595-4080-4265-82AF-51108EC956B2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ImageStitching", "ImageStitching.vcxproj", "{E09D1615-1036-4F43-909A-2E751FC947F6}"
+	ProjectSection(ProjectDependencies) = postProject
+		{B5899B32-FAC2-477E-99AA-86736B97F2FC} = {B5899B32-FAC2-477E-99AA-86736B97F2FC}
+		{0BE55595-4080-4265-82AF-51108EC956B2} = {0BE55595-4080-4265-82AF-51108EC956B2}
+		{D3D173AB-D306-4179-BEC4-95CE1B14E647} = {D3D173AB-D306-4179-BEC4-95CE1B14E647}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "EDTInHeat", "EDTInHeat.vcxproj", "{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SurfaceTrimmer", "SurfaceTrimmer.vcxproj", "{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PoissonRecon", "PoissonRecon.vcxproj", "{46F87D0E-C53A-4F95-AB48-A5DBA8014340}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AdaptiveTreeVisualization", "AdaptiveTreeVisualization.vcxproj", "{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Header Files", "Header Files", "{C83FBEC5-FED8-4C72-AF1B-FCBA6E8640AB}"
+	ProjectSection(SolutionItems) = preProject
+		Src\Allocator.h = Src\Allocator.h
+		Src\Array.h = Src\Array.h
+		Src\BinaryNode.h = Src\BinaryNode.h
+		Src\BSplineData.h = Src\BSplineData.h
+		Src\CmdLineParser.h = Src\CmdLineParser.h
+		Src\Factor.h = Src\Factor.h
+		Src\FEMTree.h = Src\FEMTree.h
+		Src\FunctionData.h = Src\FunctionData.h
+		Src\Geometry.h = Src\Geometry.h
+		Src\Image.h = Src\Image.h
+		Src\JPEG.h = Src\JPEG.h
+		Src\MarchingCubes.h = Src\MarchingCubes.h
+		Src\MAT.h = Src\MAT.h
+		Src\MyMiscellany.h = Src\MyMiscellany.h
+		Src\Ply.h = Src\Ply.h
+		Src\PlyFile.h = Src\PlyFile.h
+		Src\PNG.h = Src\PNG.h
+		Src\PointStream.h = Src\PointStream.h
+		Src\PointStreamData.h = Src\PointStreamData.h
+		Src\Polynomial.h = Src\Polynomial.h
+		Src\PPolynomial.h = Src\PPolynomial.h
+		Src\RegularTree.h = Src\RegularTree.h
+		Src\SparseMatrix.h = Src\SparseMatrix.h
+		Src\SparseMatrixInterface.h = Src\SparseMatrixInterface.h
+		Src\Window.h = Src\Window.h
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Inline Files", "Inline Files", "{ACD217C3-BE24-4438-A4E6-C713312FEA34}"
+	ProjectSection(SolutionItems) = preProject
+		Src\Array.inl = Src\Array.inl
+		Src\BMPStream.inl = Src\BMPStream.inl
+		Src\BSplineData.inl = Src\BSplineData.inl
+		Src\CmdLineParser.inl = Src\CmdLineParser.inl
+		Src\FEMTree.Evaluation.inl = Src\FEMTree.Evaluation.inl
+		Src\FEMTree.Initialize.inl = Src\FEMTree.Initialize.inl
+		Src\FEMTree.inl = Src\FEMTree.inl
+		Src\FEMTree.IsoSurface.specialized.inl = Src\FEMTree.IsoSurface.specialized.inl
+		Src\FEMTree.SortedTreeNodes.inl = Src\FEMTree.SortedTreeNodes.inl
+		Src\FEMTree.System.inl = Src\FEMTree.System.inl
+		Src\FEMTree.WeightedSamples.inl = Src\FEMTree.WeightedSamples.inl
+		Src\FunctionData.inl = Src\FunctionData.inl
+		Src\Geometry.inl = Src\Geometry.inl
+		Src\JPEG.inl = Src\JPEG.inl
+		Src\MAT.inl = Src\MAT.inl
+		Src\PNG.inl = Src\PNG.inl
+		Src\PointStream.inl = Src\PointStream.inl
+		Src\Polynomial.inl = Src\Polynomial.inl
+		Src\PPolynomial.inl = Src\PPolynomial.inl
+		Src\RegularTree.inl = Src\RegularTree.inl
+		Src\SparseMatrix.inl = Src\SparseMatrix.inl
+		Src\SparseMatrixInterface.inl = Src\SparseMatrixInterface.inl
+		Src\Window.inl = Src\Window.inl
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{D3D173AB-D306-4179-BEC4-95CE1B14E647}.Release|x64.ActiveCfg = Release|x64
+		{D3D173AB-D306-4179-BEC4-95CE1B14E647}.Release|x64.Build.0 = Release|x64
+		{D3D173AB-D306-4179-BEC4-95CE1B14E647}.Release|x86.ActiveCfg = Release|Win32
+		{D3D173AB-D306-4179-BEC4-95CE1B14E647}.Release|x86.Build.0 = Release|Win32
+		{477765F8-C16A-406B-807F-1302DAE74EBA}.Release|x64.ActiveCfg = Release|x64
+		{477765F8-C16A-406B-807F-1302DAE74EBA}.Release|x64.Build.0 = Release|x64
+		{477765F8-C16A-406B-807F-1302DAE74EBA}.Release|x86.ActiveCfg = Release|Win32
+		{477765F8-C16A-406B-807F-1302DAE74EBA}.Release|x86.Build.0 = Release|Win32
+		{B5899B32-FAC2-477E-99AA-86736B97F2FC}.Release|x64.ActiveCfg = Release|x64
+		{B5899B32-FAC2-477E-99AA-86736B97F2FC}.Release|x64.Build.0 = Release|x64
+		{B5899B32-FAC2-477E-99AA-86736B97F2FC}.Release|x86.ActiveCfg = Release|Win32
+		{B5899B32-FAC2-477E-99AA-86736B97F2FC}.Release|x86.Build.0 = Release|Win32
+		{0BE55595-4080-4265-82AF-51108EC956B2}.Release|x64.ActiveCfg = Release|x64
+		{0BE55595-4080-4265-82AF-51108EC956B2}.Release|x64.Build.0 = Release|x64
+		{0BE55595-4080-4265-82AF-51108EC956B2}.Release|x86.ActiveCfg = Release|Win32
+		{0BE55595-4080-4265-82AF-51108EC956B2}.Release|x86.Build.0 = Release|Win32
+		{E09D1615-1036-4F43-909A-2E751FC947F6}.Release|x64.ActiveCfg = Release|x64
+		{E09D1615-1036-4F43-909A-2E751FC947F6}.Release|x64.Build.0 = Release|x64
+		{E09D1615-1036-4F43-909A-2E751FC947F6}.Release|x86.ActiveCfg = Release|Win32
+		{E09D1615-1036-4F43-909A-2E751FC947F6}.Release|x86.Build.0 = Release|Win32
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x64.ActiveCfg = Release|x64
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x64.Build.0 = Release|x64
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x64.Deploy.0 = Release|x64
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x86.ActiveCfg = Release|Win32
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x86.Build.0 = Release|Win32
+		{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}.Release|x86.Deploy.0 = Release|Win32
+		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x64.ActiveCfg = Release|x64
+		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x64.Build.0 = Release|x64
+		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x86.ActiveCfg = Release|Win32
+		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x86.Build.0 = Release|Win32
+		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x64.ActiveCfg = Release|x64
+		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x64.Build.0 = Release|x64
+		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x86.ActiveCfg = Release|Win32
+		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x86.Build.0 = Release|Win32
+		{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}.Release|x64.ActiveCfg = Release|x64
+		{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}.Release|x64.Build.0 = Release|x64
+		{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}.Release|x86.ActiveCfg = Release|Win32
+		{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {F963AD25-6C34-4074-9B17-605D19CC6165}
+	EndGlobalSection
+EndGlobal
diff --git a/AdaptiveTreeVisualization.vcxproj b/AdaptiveTreeVisualization.vcxproj
new file mode 100644
index 0000000..4300eb7
--- /dev/null
+++ b/AdaptiveTreeVisualization.vcxproj
@@ -0,0 +1,159 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="Src\AdaptiveTreeVisualization.cpp" />
+    <ClCompile Include="Src\PlyFile.cpp" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{742064B3-CEBB-4AF8-B43C-D213EB1C6D7A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>OctreeVisualization</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
+      <AdditionalOptions>
+      </AdditionalOptions>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>ZLIB.lib;JPEG.lib;PNG.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/EDTInHeat.vcxproj b/EDTInHeat.vcxproj
new file mode 100644
index 0000000..f73f4c3
--- /dev/null
+++ b/EDTInHeat.vcxproj
@@ -0,0 +1,157 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="Src\EDTInHeat.cpp" />
+    <ClCompile Include="Src\PlyFile.cpp" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{3B94F9AD-35E8-4E1E-B176-AAA091E4C3CC}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>EDTInHeat</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
+      <AdditionalIncludeDirectories>C:\Research\Libraries\Include</AdditionalIncludeDirectories>
+      <AdditionalOptions>
+      </AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ImageStitching.vcxproj b/ImageStitching.vcxproj
new file mode 100644
index 0000000..6a2720d
--- /dev/null
+++ b/ImageStitching.vcxproj
@@ -0,0 +1,157 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="Src\ImageStitching.cpp" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E09D1615-1036-4F43-909A-2E751FC947F6}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>ImageStitching</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+      <OpenMPSupport>true</OpenMPSupport>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>ZLIB.lib;JPEG.lib;PNG.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/JPEG.vcxproj b/JPEG.vcxproj
new file mode 100644
index 0000000..3cc7856
--- /dev/null
+++ b/JPEG.vcxproj
@@ -0,0 +1,201 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="JPEG\ckconfig.cpp" />
+    <ClCompile Include="JPEG\jcapimin.cpp" />
+    <ClCompile Include="JPEG\jcapistd.cpp" />
+    <ClCompile Include="JPEG\jccoefct.cpp" />
+    <ClCompile Include="JPEG\jccolor.cpp" />
+    <ClCompile Include="JPEG\jcdctmgr.cpp" />
+    <ClCompile Include="JPEG\jchuff.cpp" />
+    <ClCompile Include="JPEG\jcinit.cpp" />
+    <ClCompile Include="JPEG\jcmainct.cpp" />
+    <ClCompile Include="JPEG\jcmarker.cpp" />
+    <ClCompile Include="JPEG\jcmaster.cpp" />
+    <ClCompile Include="JPEG\jcomapi.cpp" />
+    <ClCompile Include="JPEG\jcparam.cpp" />
+    <ClCompile Include="JPEG\jcphuff.cpp" />
+    <ClCompile Include="JPEG\jcprepct.cpp" />
+    <ClCompile Include="JPEG\jcsample.cpp" />
+    <ClCompile Include="JPEG\jctrans.cpp" />
+    <ClCompile Include="JPEG\jdapimin.cpp" />
+    <ClCompile Include="JPEG\jdapistd.cpp" />
+    <ClCompile Include="JPEG\jdatadst.cpp" />
+    <ClCompile Include="JPEG\jdatasrc.cpp" />
+    <ClCompile Include="JPEG\jdcoefct.cpp" />
+    <ClCompile Include="JPEG\jdcolor.cpp" />
+    <ClCompile Include="JPEG\jddctmgr.cpp" />
+    <ClCompile Include="JPEG\jdhuff.cpp" />
+    <ClCompile Include="JPEG\jdinput.cpp" />
+    <ClCompile Include="JPEG\jdmainct.cpp" />
+    <ClCompile Include="JPEG\jdmarker.cpp" />
+    <ClCompile Include="JPEG\jdmaster.cpp" />
+    <ClCompile Include="JPEG\jdmerge.cpp" />
+    <ClCompile Include="JPEG\jdphuff.cpp" />
+    <ClCompile Include="JPEG\jdpostct.cpp" />
+    <ClCompile Include="JPEG\jdsample.cpp" />
+    <ClCompile Include="JPEG\jdtrans.cpp" />
+    <ClCompile Include="JPEG\jerror.cpp" />
+    <ClCompile Include="JPEG\jfdctflt.cpp" />
+    <ClCompile Include="JPEG\jfdctfst.cpp" />
+    <ClCompile Include="JPEG\jfdctint.cpp" />
+    <ClCompile Include="JPEG\jidctflt.cpp" />
+    <ClCompile Include="JPEG\jidctfst.cpp" />
+    <ClCompile Include="JPEG\jidctint.cpp" />
+    <ClCompile Include="JPEG\jidctred.cpp" />
+    <ClCompile Include="JPEG\jmemmgr.cpp" />
+    <ClCompile Include="JPEG\jmemnobs.cpp" />
+    <ClCompile Include="JPEG\jquant1.cpp" />
+    <ClCompile Include="JPEG\jquant2.cpp" />
+    <ClCompile Include="JPEG\jutils.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="JPEG\jchuff.h" />
+    <ClInclude Include="JPEG\jconfig.h" />
+    <ClInclude Include="JPEG\jdct.h" />
+    <ClInclude Include="JPEG\jdhuff.h" />
+    <ClInclude Include="JPEG\jerror.h" />
+    <ClInclude Include="JPEG\jinclude.h" />
+    <ClInclude Include="JPEG\jmemsys.h" />
+    <ClInclude Include="JPEG\jmorecfg.h" />
+    <ClInclude Include="JPEG\jpegint.h" />
+    <ClInclude Include="JPEG\jpeglib.h" />
+    <ClInclude Include="JPEG\jversion.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{0BE55595-4080-4265-82AF-51108EC956B2}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>JPEG</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/JPEG/Makefile b/JPEG/Makefile
new file mode 100644
index 0000000..413ebaf
--- /dev/null
+++ b/JPEG/Makefile
@@ -0,0 +1,67 @@
+JPG_TARGET=libmyjpg.a
+JPG_SOURCE=ckconfig.cpp jccolor.cpp jcmainct.cpp jcparam.cpp jctrans.cpp jdatasrc.cpp jdhuff.cpp jdmaster.cpp jdsample.cpp jfdctfst.cpp jidctint.cpp jquant1.cpp jcapimin.cpp jcdctmgr.cpp jcmarker.cpp jcphuff.cpp jdapimin.cpp jdcoefct.cpp jdinput.cpp jdmerge.cpp jdtrans.cpp jfdctint.cpp jidctred.cpp jquant2.cpp jcapistd.cpp jchuff.cpp jcmaster.cpp jcprepct.cpp jdapistd.cpp jdcolor.cpp jdmainct.cpp jdphuff.cpp jerror.cpp jidctflt.cpp jmemmgr.cpp jutils.cpp jccoefct.cpp jcinit.cpp jcomapi.cpp jcsample.cpp jdatadst.cpp jddctmgr.cpp jdmarker.cpp jdpostct.cpp jfdctflt.cpp jidctfst.cpp jmemnobs.cpp 
+
+COMPILER = gcc
+#COMPILER = clang
+
+CFLAGS += -Wno-deprecated -Wno-write-strings
+
+CFLAGS_DEBUG = -DDEBUG -g3
+LFLAGS_DEBUG =
+
+CFLAGS_RELEASE = -O3 -DRELEASE -funroll-loops -ffast-math -g
+LFLAGS_RELEASE = -O3 -g
+
+SRC = ./
+BIN = ../Bin/Linux/
+INCLUDE = ../
+
+ifeq ($(COMPILER),gcc)
+	CC=gcc
+	CXX=g++
+else
+	CC=clang-3.8
+	CXX=clang++-3.8
+#	CC=clang-3.5
+#	CXX=clang++-3.5
+endif
+
+MD=mkdir
+
+JPG_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(JPG_SOURCE))))
+
+all: CFLAGS += $(CFLAGS_RELEASE)
+all: LFLAGS += $(LFLAGS_RELEASE)
+all: make_dir
+all: $(BIN)
+all: $(BIN)$(JPG_TARGET)
+
+debug: CFLAGS += $(CFLAGS_DEBUG)
+debug: LFLAGS += $(LFLAGS_DEBUG)
+debug: $(BIN)
+debug: $(BIN)$(JPG_TARGET)
+
+jpeg: CFLAGS += $(CFLAGS_RELEASE)
+jpeg: LFLAGS += $(LFLAGS_RELEASE)
+jpeg: $(BIN)
+jpeg: $(BIN)$(JPG_TARGET)
+
+clean:
+	rm -rf $(BIN)$(JPG_TARGET)
+	rm -rf $(JPG_OBJECTS)
+
+make_dir:
+	$(MD) -p $(BIN)
+
+$(BIN):
+	$(MD) -p $(BIN)
+
+$(BIN)$(JPG_TARGET): $(JPG_OBJECTS)
+	ar rcs $(BIN)$(JPG_TARGET) $(JPG_OBJECTS)
+
+$(BIN)%.o: $(SRC)%.c
+	$(CC) -c -o $@ -I$(INCLUDE) $<
+
+$(BIN)%.o: $(SRC)%.cpp
+	$(CC) -c -o $@ $(CFLAGS) -I$(INCLUDE) $<
+
diff --git a/JPEG/ckconfig.cpp b/JPEG/ckconfig.cpp
new file mode 100644
index 0000000..34baf79
--- /dev/null
+++ b/JPEG/ckconfig.cpp
@@ -0,0 +1,402 @@
+/*
+ * ckconfig.c
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ */
+
+/*
+ * This program is intended to help you determine how to configure the JPEG
+ * software for installation on a particular system.  The idea is to try to
+ * compile and execute this program.  If your compiler fails to compile the
+ * program, make changes as indicated in the comments below.  Once you can
+ * compile the program, run it, and it will produce a "jconfig.h" file for
+ * your system.
+ *
+ * As a general rule, each time you try to compile this program,
+ * pay attention only to the *first* error message you get from the compiler.
+ * Many C compilers will issue lots of spurious error messages once they
+ * have gotten confused.  Go to the line indicated in the first error message,
+ * and read the comments preceding that line to see what to change.
+ *
+ * Almost all of the edits you may need to make to this program consist of
+ * changing a line that reads "#define SOME_SYMBOL" to "#undef SOME_SYMBOL",
+ * or vice versa.  This is called defining or undefining that symbol.
+ */
+
+
+/* First we must see if your system has the include files we need.
+ * We start out with the assumption that your system has all the ANSI-standard
+ * include files.  If you get any error trying to include one of these files,
+ * undefine the corresponding HAVE_xxx symbol.
+ */
+
+#define HAVE_STDDEF_H		/* replace 'define' by 'undef' if error here */
+#ifdef HAVE_STDDEF_H		/* next line will be skipped if you undef... */
+#include <stddef.h>
+#endif
+
+#define HAVE_STDLIB_H		/* same thing for stdlib.h */
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#include <stdio.h>		/* If you ain't got this, you ain't got C. */
+
+/* We have to see if your string functions are defined by
+ * strings.h (old BSD convention) or string.h (everybody else).
+ * We try the non-BSD convention first; define NEED_BSD_STRINGS
+ * if the compiler says it can't find string.h.
+ */
+
+#undef NEED_BSD_STRINGS
+
+#ifdef NEED_BSD_STRINGS
+#include <strings.h>
+#else
+#include <string.h>
+#endif
+
+/* On some systems (especially older Unix machines), type size_t is
+ * defined only in the include file <sys/types.h>.  If you get a failure
+ * on the size_t test below, try defining NEED_SYS_TYPES_H.
+ */
+
+#undef NEED_SYS_TYPES_H		/* start by assuming we don't need it */
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+
+/* Usually type size_t is defined in one of the include files we've included
+ * above.  If not, you'll get an error on the "typedef size_t my_size_t;" line.
+ * In that case, first try defining NEED_SYS_TYPES_H just above.
+ * If that doesn't work, you'll have to search through your system library
+ * to figure out which include file defines "size_t".  Look for a line that
+ * says "typedef something-or-other size_t;".  Then, change the line below
+ * that says "#include <someincludefile.h>" to instead include the file
+ * you found size_t in, and define NEED_SPECIAL_INCLUDE.  If you can't find
+ * type size_t anywhere, try replacing "#include <someincludefile.h>" with
+ * "typedef unsigned int size_t;".
+ */
+
+#undef NEED_SPECIAL_INCLUDE	/* assume we DON'T need it, for starters */
+
+#ifdef NEED_SPECIAL_INCLUDE
+#include <someincludefile.h>
+#endif
+
+typedef size_t my_size_t;	/* The payoff: do we have size_t now? */
+
+
+/* The next question is whether your compiler supports ANSI-style function
+ * prototypes.  You need to know this in order to choose between using
+ * makefile.ansi and using makefile.unix.
+ * The #define line below is set to assume you have ANSI function prototypes.
+ * If you get an error in this group of lines, undefine HAVE_PROTOTYPES.
+ */
+
+#define HAVE_PROTOTYPES
+
+#ifdef HAVE_PROTOTYPES
+int testfunction (int arg1, int * arg2); /* check prototypes */
+
+struct methods_struct {		/* check method-pointer declarations */
+  int (*error_exit) (char *msgtext);
+  int (*trace_message) (char *msgtext);
+  int (*another_method) (void);
+};
+
+int testfunction (int arg1, int * arg2) /* check definitions */
+{
+  return arg2[arg1];
+}
+
+int test2function (void)	/* check void arg list */
+{
+  return 0;
+}
+#endif
+
+
+/* Now we want to find out if your compiler knows what "unsigned char" means.
+ * If you get an error on the "unsigned char un_char;" line,
+ * then undefine HAVE_UNSIGNED_CHAR.
+ */
+
+#define HAVE_UNSIGNED_CHAR
+
+#ifdef HAVE_UNSIGNED_CHAR
+unsigned char un_char;
+#endif
+
+
+/* Now we want to find out if your compiler knows what "unsigned short" means.
+ * If you get an error on the "unsigned short un_short;" line,
+ * then undefine HAVE_UNSIGNED_SHORT.
+ */
+
+#define HAVE_UNSIGNED_SHORT
+
+#ifdef HAVE_UNSIGNED_SHORT
+unsigned short un_short;
+#endif
+
+
+/* Now we want to find out if your compiler understands type "void".
+ * If you get an error anywhere in here, undefine HAVE_VOID.
+ */
+
+#define HAVE_VOID
+
+#ifdef HAVE_VOID
+/* Caution: a C++ compiler will insist on complete prototypes */
+typedef void * void_ptr;	/* check void * */
+#ifdef HAVE_PROTOTYPES		/* check ptr to function returning void */
+typedef void (*void_func) (int a, int b);
+#else
+typedef void (*void_func) ();
+#endif
+
+#ifdef HAVE_PROTOTYPES		/* check void function result */
+void test3function (void_ptr arg1, void_func arg2)
+#else
+void test3function (arg1, arg2)
+     void_ptr arg1;
+     void_func arg2;
+#endif
+{
+  char * locptr = (char *) arg1; /* check casting to and from void * */
+  arg1 = (void *) locptr;
+  (*arg2) (1, 2);		/* check call of fcn returning void */
+}
+#endif
+
+
+/* Now we want to find out if your compiler knows what "const" means.
+ * If you get an error here, undefine HAVE_CONST.
+ */
+
+#define HAVE_CONST
+
+#ifdef HAVE_CONST
+static const int carray[3] = {1, 2, 3};
+
+#ifdef HAVE_PROTOTYPES
+int test4function (const int arg1)
+#else
+int test4function (arg1)
+     const int arg1;
+#endif
+{
+  return carray[arg1];
+}
+#endif
+
+
+/* If you get an error or warning about this structure definition,
+ * define INCOMPLETE_TYPES_BROKEN.
+ */
+
+#undef INCOMPLETE_TYPES_BROKEN
+
+#ifndef INCOMPLETE_TYPES_BROKEN
+typedef struct undefined_structure * undef_struct_ptr;
+#endif
+
+
+/* If you get an error about duplicate names,
+ * define NEED_SHORT_EXTERNAL_NAMES.
+ */
+
+#undef NEED_SHORT_EXTERNAL_NAMES
+
+#ifndef NEED_SHORT_EXTERNAL_NAMES
+
+int possibly_duplicate_function ()
+{
+  return 0;
+}
+
+int possibly_dupli_function ()
+{
+  return 1;
+}
+
+#endif
+
+
+
+/************************************************************************
+ *  OK, that's it.  You should not have to change anything beyond this
+ *  point in order to compile and execute this program.  (You might get
+ *  some warnings, but you can ignore them.)
+ *  When you run the program, it will make a couple more tests that it
+ *  can do automatically, and then it will create jconfig.h and print out
+ *  any additional suggestions it has.
+ ************************************************************************
+ */
+
+
+#ifdef HAVE_PROTOTYPES
+int is_char_signed (int arg)
+#else
+int is_char_signed (arg)
+     int arg;
+#endif
+{
+  if (arg == 189) {		/* expected result for unsigned char */
+    return 0;			/* type char is unsigned */
+  }
+  else if (arg != -67) {	/* expected result for signed char */
+    printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n");
+    printf("I fear the JPEG software will not work at all.\n\n");
+  }
+  return 1;			/* assume char is signed otherwise */
+}
+
+
+#ifdef HAVE_PROTOTYPES
+int is_shifting_signed (long arg)
+#else
+int is_shifting_signed (arg)
+     long arg;
+#endif
+/* See whether right-shift on a long is signed or not. */
+{
+  long res = arg >> 4;
+
+  if (res == -0x7F7E80CL) {	/* expected result for signed shift */
+    return 1;			/* right shift is signed */
+  }
+  /* see if unsigned-shift hack will fix it. */
+  /* we can't just test exact value since it depends on width of long... */
+  res |= (~0L) << (32-4);
+  if (res == -0x7F7E80CL) {	/* expected result now? */
+    return 0;			/* right shift is unsigned */
+  }
+  printf("Right shift isn't acting as I expect it to.\n");
+  printf("I fear the JPEG software will not work at all.\n\n");
+  return 0;			/* try it with unsigned anyway */
+}
+
+
+#ifdef HAVE_PROTOTYPES
+int main (int argc, char ** argv)
+#else
+int main (argc, argv)
+     int argc;
+     char ** argv;
+#endif
+{
+  char signed_char_check = (char) (-67);
+  FILE *outfile;
+
+  /* Attempt to write jconfig.h */
+  if ((outfile = fopen("jconfig.h", "w")) == NULL) {
+    printf("Failed to write jconfig.h\n");
+    return 1;
+  }
+
+  /* Write out all the info */
+  fprintf(outfile, "/* jconfig.h --- generated by ckconfig.c */\n");
+  fprintf(outfile, "/* see jconfig.doc for explanations */\n\n");
+#ifdef HAVE_PROTOTYPES
+  fprintf(outfile, "#define HAVE_PROTOTYPES\n");
+#else
+  fprintf(outfile, "#undef HAVE_PROTOTYPES\n");
+#endif
+#ifdef HAVE_UNSIGNED_CHAR
+  fprintf(outfile, "#define HAVE_UNSIGNED_CHAR\n");
+#else
+  fprintf(outfile, "#undef HAVE_UNSIGNED_CHAR\n");
+#endif
+#ifdef HAVE_UNSIGNED_SHORT
+  fprintf(outfile, "#define HAVE_UNSIGNED_SHORT\n");
+#else
+  fprintf(outfile, "#undef HAVE_UNSIGNED_SHORT\n");
+#endif
+#ifdef HAVE_VOID
+  fprintf(outfile, "/* #define void char */\n");
+#else
+  fprintf(outfile, "#define void char\n");
+#endif
+#ifdef HAVE_CONST
+  fprintf(outfile, "/* #define const */\n");
+#else
+  fprintf(outfile, "#define const\n");
+#endif
+  if (is_char_signed((int) signed_char_check))
+    fprintf(outfile, "#undef CHAR_IS_UNSIGNED\n");
+  else
+    fprintf(outfile, "#define CHAR_IS_UNSIGNED\n");
+#ifdef HAVE_STDDEF_H
+  fprintf(outfile, "#define HAVE_STDDEF_H\n");
+#else
+  fprintf(outfile, "#undef HAVE_STDDEF_H\n");
+#endif
+#ifdef HAVE_STDLIB_H
+  fprintf(outfile, "#define HAVE_STDLIB_H\n");
+#else
+  fprintf(outfile, "#undef HAVE_STDLIB_H\n");
+#endif
+#ifdef NEED_BSD_STRINGS
+  fprintf(outfile, "#define NEED_BSD_STRINGS\n");
+#else
+  fprintf(outfile, "#undef NEED_BSD_STRINGS\n");
+#endif
+#ifdef NEED_SYS_TYPES_H
+  fprintf(outfile, "#define NEED_SYS_TYPES_H\n");
+#else
+  fprintf(outfile, "#undef NEED_SYS_TYPES_H\n");
+#endif
+  fprintf(outfile, "#undef NEED_FAR_POINTERS\n");
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+  fprintf(outfile, "#define NEED_SHORT_EXTERNAL_NAMES\n");
+#else
+  fprintf(outfile, "#undef NEED_SHORT_EXTERNAL_NAMES\n");
+#endif
+#ifdef INCOMPLETE_TYPES_BROKEN
+  fprintf(outfile, "#define INCOMPLETE_TYPES_BROKEN\n");
+#else
+  fprintf(outfile, "#undef INCOMPLETE_TYPES_BROKEN\n");
+#endif
+  fprintf(outfile, "\n#ifdef JPEG_INTERNALS\n\n");
+  if (is_shifting_signed(-0x7F7E80B1L))
+    fprintf(outfile, "#undef RIGHT_SHIFT_IS_UNSIGNED\n");
+  else
+    fprintf(outfile, "#define RIGHT_SHIFT_IS_UNSIGNED\n");
+  fprintf(outfile, "\n#endif /* JPEG_INTERNALS */\n");
+  fprintf(outfile, "\n#ifdef JPEG_CJPEG_DJPEG\n\n");
+  fprintf(outfile, "#define BMP_SUPPORTED		/* BMP image file format */\n");
+  fprintf(outfile, "#define GIF_SUPPORTED		/* GIF image file format */\n");
+  fprintf(outfile, "#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */\n");
+  fprintf(outfile, "#undef RLE_SUPPORTED		/* Utah RLE image file format */\n");
+  fprintf(outfile, "#define TARGA_SUPPORTED		/* Targa image file format */\n\n");
+  fprintf(outfile, "#undef TWO_FILE_COMMANDLINE	/* You may need this on non-Unix systems */\n");
+  fprintf(outfile, "#undef NEED_SIGNAL_CATCHER	/* Define this if you use jmemname.c */\n");
+  fprintf(outfile, "#undef DONT_USE_B_MODE\n");
+  fprintf(outfile, "/* #define PROGRESS_REPORT */	/* optional */\n");
+  fprintf(outfile, "\n#endif /* JPEG_CJPEG_DJPEG */\n");
+
+  /* Close the jconfig.h file */
+  fclose(outfile);
+
+  /* User report */
+  printf("Configuration check for Independent JPEG Group's software done.\n");
+  printf("\nI have written the jconfig.h file for you.\n\n");
+#ifdef HAVE_PROTOTYPES
+  printf("You should use makefile.ansi as the starting point for your Makefile.\n");
+#else
+  printf("You should use makefile.unix as the starting point for your Makefile.\n");
+#endif
+
+#ifdef NEED_SPECIAL_INCLUDE
+  printf("\nYou'll need to change jconfig.h to include the system include file\n");
+  printf("that you found type size_t in, or add a direct definition of type\n");
+  printf("size_t if that's what you used.  Just add it to the end.\n");
+#endif
+
+  return 0;
+}
diff --git a/JPEG/jcapimin.cpp b/JPEG/jcapimin.cpp
new file mode 100644
index 0000000..54fb8c5
--- /dev/null
+++ b/JPEG/jcapimin.cpp
@@ -0,0 +1,280 @@
+/*
+ * jcapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;	/* in case application forgets */
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress (j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress (j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL * qtbl;
+  JHUFF_TBL * htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress (j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (! cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) iMCU_row;
+	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
+	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker (j_compress_ptr cinfo, int marker,
+		   const JOCTET *dataptr, unsigned int datalen)
+{
+  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte (j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *		initialize JPEG object
+ *		set JPEG parameters
+ *		set destination to table file
+ *		jpeg_write_tables(cinfo);
+ *		set destination to image file
+ *		jpeg_start_compress(cinfo, FALSE);
+ *		write data...
+ *		jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/JPEG/jcapistd.cpp b/JPEG/jcapistd.cpp
new file mode 100644
index 0000000..c707cb0
--- /dev/null
+++ b/JPEG/jcapistd.cpp
@@ -0,0 +1,161 @@
+/*
+ * jcapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
+{
+	if (cinfo->global_state != CSTATE_START)
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+	if (write_all_tables)
+		jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+
+	/* (Re)initialize error mgr and destination modules */
+	(*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+	(*cinfo->dest->init_destination) (cinfo);
+	/* Perform master selection of active modules */
+	jinit_compress_master(cinfo);
+	/* Set up for the first pass */
+	(*cinfo->master->prepare_for_pass) (cinfo);
+	/* Ready for application to drive first pass through jpeg_write_scanlines
+	* or jpeg_write_raw_data.
+	*/
+	cinfo->next_scanline = 0;
+	cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
+		      JDIMENSION num_lines)
+{
+  JDIMENSION row_ctr, rows_left;
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+		     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
diff --git a/JPEG/jccoefct.cpp b/JPEG/jccoefct.cpp
new file mode 100644
index 0000000..1963ddb
--- /dev/null
+++ b/JPEG/jccoefct.cpp
@@ -0,0 +1,449 @@
+/*
+ * jccoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).  We allocate a
+   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
+   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
+   * it's not really very big; this is to keep the module interfaces unchanged
+   * when a large coefficient buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+METHODDEF(boolean) compress_output
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, bi, ci, yindex, yoffset, blockcnt;
+  JDIMENSION ypos, xpos;
+  jpeg_component_info *compptr;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks
+       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
+       * sequentially.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkn = 0;
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	xpos = MCU_col_num * compptr->MCU_sample_width;
+	ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+					 input_buf[compptr->component_index],
+					 coef->MCU_buffer[blkn],
+					 ypos, xpos, (JDIMENSION) blockcnt);
+	    if (blockcnt < compptr->MCU_width) {
+	      /* Create some dummy blocks at the right edge of the image. */
+	      jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
+			(compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
+	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+	      }
+	    }
+	  } else {
+	    /* Create a row of dummy blocks at the bottom of the image. */
+	    jzero_far((void FAR *) coef->MCU_buffer[blkn],
+		      compptr->MCU_width * SIZEOF(JBLOCK));
+	    for (bi = 0; bi < compptr->MCU_width; bi++) {
+	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  ypos += DCTSIZE;
+	}
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int) (blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+				   input_buf[ci], thisblockrow,
+				   (JDIMENSION) (block_row * DCTSIZE),
+				   (JDIMENSION) 0, blocks_across);
+      if (ndummy > 0) {
+	/* Create dummy blocks at the right edge of the image. */
+	thisblockrow += blocks_across; /* => first dummy block */
+	jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+	lastDC = thisblockrow[-1][0];
+	for (bi = 0; bi < ndummy; bi++) {
+	  thisblockrow[bi][0] = lastDC;
+	}
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (coef->iMCU_row_num == last_iMCU_row) {
+      blocks_across += ndummy;	/* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      for (block_row = block_rows; block_row < compptr->v_samp_factor;
+	   block_row++) {
+	thisblockrow = buffer[block_row];
+	lastblockrow = buffer[block_row-1];
+	jzero_far((void FAR *) thisblockrow,
+		  (size_t) (blocks_across * SIZEOF(JBLOCK)));
+	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+	  lastDC = lastblockrow[h_samp_factor-1][0];
+	  for (bi = 0; bi < h_samp_factor; bi++) {
+	    thisblockrow[bi][0] = lastDC;
+	  }
+	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
+	  lastblockrow += h_samp_factor;
+	}
+      }
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/JPEG/jccolor.cpp b/JPEG/jccolor.cpp
new file mode 100644
index 0000000..0a8a4b5
--- /dev/null
+++ b/JPEG/jccolor.cpp
@@ -0,0 +1,459 @@
+/*
+ * jccolor.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+  /* Private state for RGB->YCC conversion */
+  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+} my_color_converter;
+
+typedef my_color_converter * my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
+ * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define R_CB_OFF	(3*(MAXJSAMPLE+1))
+#define G_CB_OFF	(4*(MAXJSAMPLE+1))
+#define B_CB_OFF	(5*(MAXJSAMPLE+1))
+#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF	(6*(MAXJSAMPLE+1))
+#define B_CR_OFF	(7*(MAXJSAMPLE+1))
+#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_ycc_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+*/
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert (j_compress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		 JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      inptr += 4;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+  int instride = cinfo->input_components;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      inptr += instride;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert (j_compress_ptr cinfo,
+	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	      JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  register int ci;
+  int nc = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    for (ci = 0; ci < nc; ci++) {
+      inptr = *input_buf;
+      outptr = output_buf[ci][output_row];
+      for (col = 0; col < num_cols; col++) {
+	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+	inptr += nc;
+      }
+    }
+    input_buf++;
+    output_row++;
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method (j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_converter (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_converter));
+  cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    if (cinfo->input_components != RGB_PIXELSIZE)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+#endif /* else share code with YCbCr */
+
+  case JCS_YCbCr:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+  }
+
+  /* Check num_components, set conversion method based on requested space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_GRAYSCALE)
+      cconvert->pub.color_convert = grayscale_convert;
+    else if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_gray_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = grayscale_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = cmyk_ycck_convert;
+    } else if (cinfo->in_color_space == JCS_YCCK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:			/* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+	cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+    break;
+  }
+}
diff --git a/JPEG/jcdctmgr.cpp b/JPEG/jcdctmgr.cpp
new file mode 100644
index 0000000..61fa79b
--- /dev/null
+++ b/JPEG/jcdctmgr.cpp
@@ -0,0 +1,387 @@
+/*
+ * jcdctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_forward_dct pub;	/* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr do_dct;
+
+  /* The actual post-DCT divisors --- not identical to the quant table
+   * entries, because of scaling (especially for an unnormalized DCT).
+   * Each table is given in normal array order.
+   */
+  DCTELEM * divisors[NUM_QUANT_TBLS];
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr do_float_dct;
+  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller * my_fdct_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+  DCTELEM * dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Compute divisors for this quant table */
+    /* We may do this more than once for same table, but it's not a big deal */
+    switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      if (fdct->divisors[qtblno] == NULL) {
+	fdct->divisors[qtblno] = (DCTELEM *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      DCTSIZE2 * SIZEOF(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 */
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	if (fdct->divisors[qtblno] == NULL) {
+	  fdct->divisors[qtblno] = (DCTELEM *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(DCTELEM));
+	}
+	dtbl = fdct->divisors[qtblno];
+	for (i = 0; i < DCTSIZE2; i++) {
+	  dtbl[i] = (DCTELEM)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-3);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 * What's actually stored is 1/divisor so that the inner loop can
+	 * use a multiplication rather than a division.
+	 */
+	FAST_FLOAT * fdtbl;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	if (fdct->float_divisors[qtblno] == NULL) {
+	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(FAST_FLOAT));
+	}
+	fdtbl = fdct->float_divisors[qtblno];
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fdtbl[i] = (FAST_FLOAT)
+	      (1.0 / (((double) qtbl->quantval[i] *
+		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_row/start_col, and moving to the right for any additional
+ * blocks. The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+	     JDIMENSION start_row, JDIMENSION start_col,
+	     JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  forward_DCT_method_ptr do_dct = fdct->do_dct;
+  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register DCTELEM *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register DCTELEM temp, qval;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	qval = divisors[i];
+	temp = workspace[i];
+	/* Divide the coefficient value by qval, ensuring proper rounding.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 *
+	 * In most files, at least half of the output values will be zero
+	 * (at default quantization settings, more like three-quarters...)
+	 * so we should ensure that this case is fast.  On many machines,
+	 * a comparison is enough cheaper than a divide to make a special test
+	 * a win.  Since both inputs will be nonnegative, we need only test
+	 * for a < b to discover whether a/b is 0.
+	 * If your machine's division is fast enough, define FAST_DIVIDE.
+	 */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)	a /= b
+#else
+#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
+#endif
+	if (temp < 0) {
+	  temp = -temp;
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	  temp = -temp;
+	} else {
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	}
+	output_ptr[i] = (JCOEF) temp;
+      }
+    }
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		   JDIMENSION start_row, JDIMENSION start_col,
+		   JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  float_DCT_method_ptr do_dct = fdct->do_float_dct;
+  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register FAST_FLOAT *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = (FAST_FLOAT)
+	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register FAST_FLOAT temp;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	/* Apply the quantization and scaling factor */
+	temp = workspace[i] * divisors[i];
+	/* Round to nearest integer.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 * The maximum coefficient size is +-16K (for 12-bit data), so this
+	 * code should work for either 16-bit or 32-bit ints.
+	 */
+	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+      }
+    }
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+jinit_forward_dct (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int i;
+
+  fdct = (my_fdct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_fdct_controller));
+  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_islow;
+    break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_ifast;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+    fdct->pub.forward_DCT = forward_DCT_float;
+    fdct->do_float_dct = jpeg_fdct_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* Mark divisor tables unallocated */
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    fdct->divisors[i] = NULL;
+#ifdef DCT_FLOAT_SUPPORTED
+    fdct->float_divisors[i] = NULL;
+#endif
+  }
+}
diff --git a/JPEG/jchuff.cpp b/JPEG/jchuff.cpp
new file mode 100644
index 0000000..4def0f7
--- /dev/null
+++ b/JPEG/jchuff.cpp
@@ -0,0 +1,909 @@
+/*
+* jchuff.c
+*
+* Copyright (C) 1991-1997, Thomas G. Lane.
+* This file is part of the Independent JPEG Group's software.
+* For conditions of distribution and use, see the accompanying README file.
+*
+* This file contains Huffman entropy encoding routines.
+*
+* Much of the complexity here has to do with supporting output suspension.
+* If the data destination module demands suspension, we want to be able to
+* back up to the start of the current MCU.  To do this, we copy state
+* variables into local working storage, and update them back to the
+* permanent JPEG objects only upon successful completion of an MCU.
+*/
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jcphuff.c */
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+*
+* The savable_state subrecord contains fields that change within an MCU,
+* but must not be updated permanently until we complete the MCU.
+*/
+
+typedef struct {
+	INT32 put_buffer;		/* current bit-accumulation buffer */
+	int put_bits;			/* # of bits now in it */
+	int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+* structure assignment.  You'll need to fix this code if you have
+* such a compiler and you change MAX_COMPS_IN_SCAN.
+*/
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).put_buffer = (src).put_buffer, \
+	(dest).put_bits = (src).put_bits, \
+	(dest).last_dc_val[0] = (src).last_dc_val[0], \
+	(dest).last_dc_val[1] = (src).last_dc_val[1], \
+	(dest).last_dc_val[2] = (src).last_dc_val[2], \
+(dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+	struct jpeg_entropy_encoder pub; /* public fields */
+	
+	savable_state saved;		/* Bit buffer & DC state at start of MCU */
+	
+	/* These fields are NOT loaded into local working state. */
+	unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+	int next_restart_num;		/* next restart number to write (0-7) */
+	
+	/* Pointers to derived tables (these workspaces have image lifespan) */
+	c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+	c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+	
+#ifdef ENTROPY_OPT_SUPPORTED	/* Statistics tables for optimization */
+	long * dc_count_ptrs[NUM_HUFF_TBLS];
+	long * ac_count_ptrs[NUM_HUFF_TBLS];
+#endif
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder * huff_entropy_ptr;
+
+/* Working state while writing an MCU.
+* This struct contains all the fields that are needed by subroutines.
+*/
+
+typedef struct {
+	JOCTET * next_output_byte;	/* => next byte to write in buffer */
+	size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+	savable_state cur;		/* Current bit buffer & DC state */
+	j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+} working_state;
+
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo,
+									   JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo));
+#ifdef ENTROPY_OPT_SUPPORTED
+METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo,
+										 JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo));
+#endif
+
+
+/*
+* Initialize for a Huffman-compressed scan.
+* If gather_statistics is TRUE, we do not output anything during the scan,
+* just count the Huffman symbols used and generate Huffman code tables.
+*/
+
+METHODDEF(void)
+start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
+{
+	huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+	int ci, dctbl, actbl;
+	jpeg_component_info * compptr;
+	
+	if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+		entropy->pub.encode_mcu = encode_mcu_gather;
+		entropy->pub.finish_pass = finish_pass_gather;
+#else
+		ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+	} else {
+		entropy->pub.encode_mcu = encode_mcu_huff;
+		entropy->pub.finish_pass = finish_pass_huff;
+	}
+	
+	for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+		compptr = cinfo->cur_comp_info[ci];
+		dctbl = compptr->dc_tbl_no;
+		actbl = compptr->ac_tbl_no;
+		if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+			/* Check for invalid table indexes */
+			/* (make_c_derived_tbl does this in the other path) */
+			if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
+				ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+			if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
+				ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+			/* Allocate and zero the statistics tables */
+			/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+			if (entropy->dc_count_ptrs[dctbl] == NULL)
+				entropy->dc_count_ptrs[dctbl] = (long *)
+				(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				257 * SIZEOF(long));
+			MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long));
+			if (entropy->ac_count_ptrs[actbl] == NULL)
+				entropy->ac_count_ptrs[actbl] = (long *)
+				(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				257 * SIZEOF(long));
+			MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long));
+#endif
+		} else {
+			/* Compute derived values for Huffman tables */
+			/* We may do this more than once for a table, but it's not expensive */
+			jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
+				& entropy->dc_derived_tbls[dctbl]);
+			jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
+				& entropy->ac_derived_tbls[actbl]);
+		}
+		/* Initialize DC predictions to 0 */
+		entropy->saved.last_dc_val[ci] = 0;
+	}
+	
+	/* Initialize bit buffer to empty */
+	entropy->saved.put_buffer = 0;
+	entropy->saved.put_bits = 0;
+	
+	/* Initialize restart stuff */
+	entropy->restarts_to_go = cinfo->restart_interval;
+	entropy->next_restart_num = 0;
+}
+
+
+/*
+* Compute the derived values for a Huffman table.
+* This routine also performs some validation checks on the table.
+*
+* Note this is also used by jcphuff.c.
+*/
+
+GLOBAL(void)
+jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
+						 c_derived_tbl ** pdtbl)
+{
+	JHUFF_TBL *htbl;
+	c_derived_tbl *dtbl;
+	int p, i, l, lastp, si, maxsymbol;
+	char huffsize[257];
+	unsigned int huffcode[257];
+	unsigned int code;
+	
+	/* Note that huffsize[] and huffcode[] are filled in code-length order,
+	* paralleling the order of the symbols themselves in htbl->huffval[].
+	*/
+	
+	/* Find the input Huffman table */
+	if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+		ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+	htbl =
+		isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+	if (htbl == NULL)
+		ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+	
+	/* Allocate a workspace if we haven't already done so. */
+	if (*pdtbl == NULL)
+		*pdtbl = (c_derived_tbl *)
+		(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		SIZEOF(c_derived_tbl));
+	dtbl = *pdtbl;
+	
+	/* Figure C.1: make table of Huffman code length for each symbol */
+	
+	p = 0;
+	for (l = 1; l <= 16; l++) {
+		i = (int) htbl->bits[l];
+		if (i < 0 || p + i > 256)	/* protect against table overrun */
+			ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+		while (i--)
+			huffsize[p++] = (char) l;
+	}
+	huffsize[p] = 0;
+	lastp = p;
+	
+	/* Figure C.2: generate the codes themselves */
+	/* We also validate that the counts represent a legal Huffman code tree. */
+	
+	code = 0;
+	si = huffsize[0];
+	p = 0;
+	while (huffsize[p]) {
+		while (((int) huffsize[p]) == si) {
+			huffcode[p++] = code;
+			code++;
+		}
+		/* code is now 1 more than the last code used for codelength si; but
+		* it must still fit in si bits, since no code is allowed to be all ones.
+		*/
+		if (((INT32) code) >= (((INT32) 1) << si))
+			ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+		code <<= 1;
+		si++;
+	}
+	
+	/* Figure C.3: generate encoding tables */
+	/* These are code and size indexed by symbol value */
+	
+	/* Set all codeless symbols to have code length 0;
+	* this lets us detect duplicate VAL entries here, and later
+	* allows emit_bits to detect any attempt to emit such symbols.
+	*/
+	MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+	
+	/* This is also a convenient place to check for out-of-range
+	* and duplicated VAL entries.  We allow 0..255 for AC symbols
+	* but only 0..15 for DC.  (We could constrain them further
+	* based on data depth and mode, but this seems enough.)
+	*/
+	maxsymbol = isDC ? 15 : 255;
+	
+	for (p = 0; p < lastp; p++) {
+		i = htbl->huffval[p];
+		if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+			ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+		dtbl->ehufco[i] = huffcode[p];
+		dtbl->ehufsi[i] = huffsize[p];
+	}
+}
+
+
+/* Outputting bytes to the file */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte(state,val,action)  \
+{ *(state)->next_output_byte++ = (JOCTET) (val);  \
+	if (--(state)->free_in_buffer == 0)  \
+	if (! dump_buffer(state))  \
+{ action; } }
+
+
+LOCAL(boolean)
+dump_buffer (working_state * state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+	struct jpeg_destination_mgr * dest = state->cinfo->dest;
+	
+	if (! (*dest->empty_output_buffer) (state->cinfo))
+		return FALSE;
+	/* After a successful buffer dump, must reset buffer pointers */
+	state->next_output_byte = dest->next_output_byte;
+	state->free_in_buffer = dest->free_in_buffer;
+	return TRUE;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+* left-justified in this part.  At most 16 bits can be passed to emit_bits
+* in one call, and we never retain more than 7 bits in put_buffer
+* between calls, so 24 bits are sufficient.
+*/
+
+//INLINE
+LOCAL(boolean)
+emit_bits (working_state * state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+	/* This routine is heavily used, so it's worth coding tightly. */
+	register INT32 put_buffer = (INT32) code;
+	register int put_bits = state->cur.put_bits;
+	
+	/* if size is 0, caller used an invalid Huffman table entry */
+	if (size == 0)
+		ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+	
+	put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+	
+	put_bits += size;		/* new number of bits in buffer */
+	
+	put_buffer <<= 24 - put_bits; /* align incoming bits */
+	
+	put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+	
+	while (put_bits >= 8) {
+		int c = (int) ((put_buffer >> 16) & 0xFF);
+		
+		emit_byte(state, c, return FALSE);
+		if (c == 0xFF) {		/* need to stuff a zero byte? */
+			emit_byte(state, 0, return FALSE);
+		}
+		put_buffer <<= 8;
+		put_bits -= 8;
+	}
+	
+	state->cur.put_buffer = put_buffer; /* update state variables */
+	state->cur.put_bits = put_bits;
+	
+	return TRUE;
+}
+
+
+LOCAL(boolean)
+flush_bits (working_state * state)
+{
+	if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
+		return FALSE;
+	state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
+	state->cur.put_bits = 0;
+	return TRUE;
+}
+
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
+				  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+	register int temp, temp2;
+	register int nbits;
+	register int k, r, i;
+	
+	/* Encode the DC coefficient difference per section F.1.2.1 */
+	
+	temp = temp2 = block[0] - last_dc_val;
+	
+	if (temp < 0) {
+		temp = -temp;		/* temp is abs value of input */
+		/* For a negative input, want temp2 = bitwise complement of abs(input) */
+		/* This code assumes we are on a two's complement machine */
+		temp2--;
+	}
+	
+	/* Find the number of bits needed for the magnitude of the coefficient */
+	nbits = 0;
+	while (temp) {
+		nbits++;
+		temp >>= 1;
+	}
+	/* Check for out-of-range coefficient values.
+	* Since we're encoding a difference, the range limit is twice as much.
+	*/
+	if (nbits > MAX_COEF_BITS+1)
+		ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+	
+	/* Emit the Huffman-coded symbol for the number of bits */
+	if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+		return FALSE;
+	
+	/* Emit that number of bits of the value, if positive, */
+	/* or the complement of its magnitude, if negative. */
+	if (nbits)			/* emit_bits rejects calls with size 0 */
+		if (! emit_bits(state, (unsigned int) temp2, nbits))
+			return FALSE;
+		
+		/* Encode the AC coefficients per section F.1.2.2 */
+		
+		r = 0;			/* r = run length of zeros */
+		
+		for (k = 1; k < DCTSIZE2; k++) {
+			if ((temp = block[jpeg_natural_order[k]]) == 0) {
+				r++;
+			} else {
+				/* if run length > 15, must emit special run-length-16 codes (0xF0) */
+				while (r > 15) {
+					if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
+						return FALSE;
+					r -= 16;
+				}
+				
+				temp2 = temp;
+				if (temp < 0) {
+					temp = -temp;		/* temp is abs value of input */
+					/* This code assumes we are on a two's complement machine */
+					temp2--;
+				}
+				
+				/* Find the number of bits needed for the magnitude of the coefficient */
+				nbits = 1;		/* there must be at least one 1 bit */
+				while ((temp >>= 1))
+					nbits++;
+				/* Check for out-of-range coefficient values */
+				if (nbits > MAX_COEF_BITS)
+					ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+				
+				/* Emit Huffman symbol for run length / number of bits */
+				i = (r << 4) + nbits;
+				if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
+					return FALSE;
+				
+				/* Emit that number of bits of the value, if positive, */
+				/* or the complement of its magnitude, if negative. */
+				if (! emit_bits(state, (unsigned int) temp2, nbits))
+					return FALSE;
+				
+				r = 0;
+			}
+		}
+		
+		/* If the last coef(s) were zero, emit an end-of-block code */
+		if (r > 0)
+			if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
+				return FALSE;
+			
+			return TRUE;
+}
+
+
+/*
+* Emit a restart marker & resynchronize predictions.
+*/
+
+LOCAL(boolean)
+emit_restart (working_state * state, int restart_num)
+{
+	int ci;
+	
+	if (! flush_bits(state))
+		return FALSE;
+	
+	emit_byte(state, 0xFF, return FALSE);
+	emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
+	
+	/* Re-initialize DC predictions to 0 */
+	for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+		state->cur.last_dc_val[ci] = 0;
+	
+	/* The restart counter is not updated until we successfully write the MCU. */
+	
+	return TRUE;
+}
+
+
+/*
+* Encode and output one MCU's worth of Huffman-compressed coefficients.
+*/
+
+METHODDEF(boolean)
+encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+	huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+	working_state state;
+	int blkn, ci;
+	jpeg_component_info * compptr;
+	
+	/* Load up working state */
+	state.next_output_byte = cinfo->dest->next_output_byte;
+	state.free_in_buffer = cinfo->dest->free_in_buffer;
+	ASSIGN_STATE(state.cur, entropy->saved);
+	state.cinfo = cinfo;
+	
+	/* Emit restart marker if needed */
+	if (cinfo->restart_interval) {
+		if (entropy->restarts_to_go == 0)
+			if (! emit_restart(&state, entropy->next_restart_num))
+				return FALSE;
+	}
+	
+	/* Encode the MCU data blocks */
+	for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+		ci = cinfo->MCU_membership[blkn];
+		compptr = cinfo->cur_comp_info[ci];
+		if (! encode_one_block(&state,
+			MCU_data[blkn][0], state.cur.last_dc_val[ci],
+			entropy->dc_derived_tbls[compptr->dc_tbl_no],
+			entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+			return FALSE;
+		/* Update last_dc_val */
+		state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+	}
+	
+	/* Completed MCU, so update state */
+	cinfo->dest->next_output_byte = state.next_output_byte;
+	cinfo->dest->free_in_buffer = state.free_in_buffer;
+	ASSIGN_STATE(entropy->saved, state.cur);
+	
+	/* Update restart-interval state too */
+	if (cinfo->restart_interval) {
+		if (entropy->restarts_to_go == 0) {
+			entropy->restarts_to_go = cinfo->restart_interval;
+			entropy->next_restart_num++;
+			entropy->next_restart_num &= 7;
+		}
+		entropy->restarts_to_go--;
+	}
+	
+	return TRUE;
+}
+
+
+/*
+* Finish up at the end of a Huffman-compressed scan.
+*/
+
+METHODDEF(void)
+finish_pass_huff (j_compress_ptr cinfo)
+{
+	huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+	working_state state;
+	
+	/* Load up working state ... flush_bits needs it */
+	state.next_output_byte = cinfo->dest->next_output_byte;
+	state.free_in_buffer = cinfo->dest->free_in_buffer;
+	ASSIGN_STATE(state.cur, entropy->saved);
+	state.cinfo = cinfo;
+	
+	/* Flush out the last data */
+	if (! flush_bits(&state))
+		ERREXIT(cinfo, JERR_CANT_SUSPEND);
+	
+	/* Update state */
+	cinfo->dest->next_output_byte = state.next_output_byte;
+	cinfo->dest->free_in_buffer = state.free_in_buffer;
+	ASSIGN_STATE(entropy->saved, state.cur);
+}
+
+
+/*
+* Huffman coding optimization.
+*
+* We first scan the supplied data and count the number of uses of each symbol
+* that is to be Huffman-coded. (This process MUST agree with the code above.)
+* Then we build a Huffman coding tree for the observed counts.
+* Symbols which are not needed at all for the particular image are not
+* assigned any code, which saves space in the DHT marker as well as in
+* the compressed data.
+*/
+
+#ifdef ENTROPY_OPT_SUPPORTED
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+				 long dc_counts[], long ac_counts[])
+{
+	register int temp;
+	register int nbits;
+	register int k, r;
+	
+	/* Encode the DC coefficient difference per section F.1.2.1 */
+	
+	temp = block[0] - last_dc_val;
+	if (temp < 0)
+		temp = -temp;
+	
+	/* Find the number of bits needed for the magnitude of the coefficient */
+	nbits = 0;
+	while (temp) {
+		nbits++;
+		temp >>= 1;
+	}
+	/* Check for out-of-range coefficient values.
+	* Since we're encoding a difference, the range limit is twice as much.
+	*/
+	if (nbits > MAX_COEF_BITS+1)
+		ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+	
+	/* Count the Huffman symbol for the number of bits */
+	dc_counts[nbits]++;
+	
+	/* Encode the AC coefficients per section F.1.2.2 */
+	
+	r = 0;			/* r = run length of zeros */
+	
+	for (k = 1; k < DCTSIZE2; k++) {
+		if ((temp = block[jpeg_natural_order[k]]) == 0) {
+			r++;
+		} else {
+			/* if run length > 15, must emit special run-length-16 codes (0xF0) */
+			while (r > 15) {
+				ac_counts[0xF0]++;
+				r -= 16;
+			}
+			
+			/* Find the number of bits needed for the magnitude of the coefficient */
+			if (temp < 0)
+				temp = -temp;
+			
+			/* Find the number of bits needed for the magnitude of the coefficient */
+			nbits = 1;		/* there must be at least one 1 bit */
+			while ((temp >>= 1))
+				nbits++;
+			/* Check for out-of-range coefficient values */
+			if (nbits > MAX_COEF_BITS)
+				ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+			
+			/* Count Huffman symbol for run length / number of bits */
+			ac_counts[(r << 4) + nbits]++;
+			
+			r = 0;
+		}
+	}
+	
+	/* If the last coef(s) were zero, emit an end-of-block code */
+	if (r > 0)
+		ac_counts[0]++;
+}
+
+
+/*
+* Trial-encode one MCU's worth of Huffman-compressed coefficients.
+* No data is actually output, so no suspension return is possible.
+*/
+
+METHODDEF(boolean)
+encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+	huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+	int blkn, ci;
+	jpeg_component_info * compptr;
+	
+	/* Take care of restart intervals if needed */
+	if (cinfo->restart_interval) {
+		if (entropy->restarts_to_go == 0) {
+			/* Re-initialize DC predictions to 0 */
+			for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+				entropy->saved.last_dc_val[ci] = 0;
+			/* Update restart state */
+			entropy->restarts_to_go = cinfo->restart_interval;
+		}
+		entropy->restarts_to_go--;
+	}
+	
+	for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+		ci = cinfo->MCU_membership[blkn];
+		compptr = cinfo->cur_comp_info[ci];
+		htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+			entropy->dc_count_ptrs[compptr->dc_tbl_no],
+			entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+		entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+	}
+	
+	return TRUE;
+}
+
+
+/*
+* Generate the best Huffman code table for the given counts, fill htbl.
+* Note this is also used by jcphuff.c.
+*
+* The JPEG standard requires that no symbol be assigned a codeword of all
+* one bits (so that padding bits added at the end of a compressed segment
+* can't look like a valid code).  Because of the canonical ordering of
+* codewords, this just means that there must be an unused slot in the
+* longest codeword length category.  Section K.2 of the JPEG spec suggests
+* reserving such a slot by pretending that symbol 256 is a valid symbol
+* with count 1.  In theory that's not optimal; giving it count zero but
+* including it in the symbol set anyway should give a better Huffman code.
+* But the theoretically better code actually seems to come out worse in
+* practice, because it produces more all-ones bytes (which incur stuffed
+* zero bytes in the final file).  In any case the difference is tiny.
+*
+* The JPEG standard requires Huffman codes to be no more than 16 bits long.
+* If some symbols have a very small but nonzero probability, the Huffman tree
+* must be adjusted to meet the code length restriction.  We currently use
+* the adjustment method suggested in JPEG section K.2.  This method is *not*
+* optimal; it may not choose the best possible limited-length code.  But
+* typically only very-low-frequency symbols will be given less-than-optimal
+* lengths, so the code is almost optimal.  Experimental comparisons against
+* an optimal limited-length-code algorithm indicate that the difference is
+* microscopic --- usually less than a hundredth of a percent of total size.
+* So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+*/
+
+GLOBAL(void)
+jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
+{
+#define MAX_CLEN 32		/* assumed maximum initial code length */
+	UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
+	int codesize[257];		/* codesize[k] = code length of symbol k */
+	int others[257];		/* next symbol in current branch of tree */
+	int c1, c2;
+	int p, i, j;
+	long v;
+	
+	/* This algorithm is explained in section K.2 of the JPEG standard */
+	
+	MEMZERO(bits, SIZEOF(bits));
+	MEMZERO(codesize, SIZEOF(codesize));
+	for (i = 0; i < 257; i++)
+		others[i] = -1;		/* init links to empty */
+	
+	freq[256] = 1;		/* make sure 256 has a nonzero count */
+						/* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+						* that no real symbol is given code-value of all ones, because 256
+						* will be placed last in the largest codeword category.
+	*/
+	
+	/* Huffman's basic algorithm to assign optimal code lengths to symbols */
+	
+	for (;;) {
+		/* Find the smallest nonzero frequency, set c1 = its symbol */
+		/* In case of ties, take the larger symbol number */
+		c1 = -1;
+		v = 1000000000L;
+		for (i = 0; i <= 256; i++) {
+			if (freq[i] && freq[i] <= v) {
+				v = freq[i];
+				c1 = i;
+			}
+		}
+		
+		/* Find the next smallest nonzero frequency, set c2 = its symbol */
+		/* In case of ties, take the larger symbol number */
+		c2 = -1;
+		v = 1000000000L;
+		for (i = 0; i <= 256; i++) {
+			if (freq[i] && freq[i] <= v && i != c1) {
+				v = freq[i];
+				c2 = i;
+			}
+		}
+		
+		/* Done if we've merged everything into one frequency */
+		if (c2 < 0)
+			break;
+		
+		/* Else merge the two counts/trees */
+		freq[c1] += freq[c2];
+		freq[c2] = 0;
+		
+		/* Increment the codesize of everything in c1's tree branch */
+		codesize[c1]++;
+		while (others[c1] >= 0) {
+			c1 = others[c1];
+			codesize[c1]++;
+		}
+		
+		others[c1] = c2;		/* chain c2 onto c1's tree branch */
+		
+		/* Increment the codesize of everything in c2's tree branch */
+		codesize[c2]++;
+		while (others[c2] >= 0) {
+			c2 = others[c2];
+			codesize[c2]++;
+		}
+	}
+	
+	/* Now count the number of symbols of each code length */
+	for (i = 0; i <= 256; i++) {
+		if (codesize[i]) {
+			/* The JPEG standard seems to think that this can't happen, */
+			/* but I'm paranoid... */
+			if (codesize[i] > MAX_CLEN)
+				ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+			
+			bits[codesize[i]]++;
+		}
+	}
+	
+	/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+	* Huffman procedure assigned any such lengths, we must adjust the coding.
+	* Here is what the JPEG spec says about how this next bit works:
+	* Since symbols are paired for the longest Huffman code, the symbols are
+	* removed from this length category two at a time.  The prefix for the pair
+	* (which is one bit shorter) is allocated to one of the pair; then,
+	* skipping the BITS entry for that prefix length, a code word from the next
+	* shortest nonzero BITS entry is converted into a prefix for two code words
+	* one bit longer.
+	*/
+	
+	for (i = MAX_CLEN; i > 16; i--) {
+		while (bits[i] > 0) {
+			j = i - 2;		/* find length of new prefix to be used */
+			while (bits[j] == 0)
+				j--;
+			
+			bits[i] -= 2;		/* remove two symbols */
+			bits[i-1]++;		/* one goes in this length */
+			bits[j+1] += 2;		/* two new symbols in this length */
+			bits[j]--;		/* symbol of this length is now a prefix */
+		}
+	}
+	
+	/* Remove the count for the pseudo-symbol 256 from the largest codelength */
+	while (bits[i] == 0)		/* find largest codelength still in use */
+		i--;
+	bits[i]--;
+	
+	/* Return final symbol counts (only for lengths 0..16) */
+	MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+	
+	/* Return a list of the symbols sorted by code length */
+	/* It's not real clear to me why we don't need to consider the codelength
+	* changes made above, but the JPEG spec seems to think this works.
+	*/
+	p = 0;
+	for (i = 1; i <= MAX_CLEN; i++) {
+		for (j = 0; j <= 255; j++) {
+			if (codesize[j] == i) {
+				htbl->huffval[p] = (UINT8) j;
+				p++;
+			}
+		}
+	}
+	
+	/* Set sent_table FALSE so updated table will be written to JPEG file. */
+	htbl->sent_table = FALSE;
+}
+
+
+/*
+* Finish up a statistics-gathering pass and create the new Huffman tables.
+*/
+
+METHODDEF(void)
+finish_pass_gather (j_compress_ptr cinfo)
+{
+	huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+	int ci, dctbl, actbl;
+	jpeg_component_info * compptr;
+	JHUFF_TBL **htblptr;
+	boolean did_dc[NUM_HUFF_TBLS];
+	boolean did_ac[NUM_HUFF_TBLS];
+	
+	/* It's important not to apply jpeg_gen_optimal_table more than once
+	* per table, because it clobbers the input frequency counts!
+	*/
+	MEMZERO(did_dc, SIZEOF(did_dc));
+	MEMZERO(did_ac, SIZEOF(did_ac));
+	
+	for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+		compptr = cinfo->cur_comp_info[ci];
+		dctbl = compptr->dc_tbl_no;
+		actbl = compptr->ac_tbl_no;
+		if (! did_dc[dctbl]) {
+			htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];
+			if (*htblptr == NULL)
+				*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+			jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
+			did_dc[dctbl] = TRUE;
+		}
+		if (! did_ac[actbl]) {
+			htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];
+			if (*htblptr == NULL)
+				*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+			jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
+			did_ac[actbl] = TRUE;
+		}
+	}
+}
+
+
+#endif /* ENTROPY_OPT_SUPPORTED */
+
+
+/*
+* Module initialization routine for Huffman entropy encoding.
+*/
+
+GLOBAL(void)
+jinit_huff_encoder (j_compress_ptr cinfo)
+{
+	huff_entropy_ptr entropy;
+	int i;
+	
+	entropy = (huff_entropy_ptr)
+		(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		SIZEOF(huff_entropy_encoder));
+	cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+	entropy->pub.start_pass = start_pass_huff;
+	
+	/* Mark tables unallocated */
+	for (i = 0; i < NUM_HUFF_TBLS; i++) {
+		entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+#ifdef ENTROPY_OPT_SUPPORTED
+		entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+#endif
+	}
+}
diff --git a/JPEG/jchuff.h b/JPEG/jchuff.h
new file mode 100644
index 0000000..a9599fc
--- /dev/null
+++ b/JPEG/jchuff.h
@@ -0,0 +1,47 @@
+/*
+ * jchuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy encoding routines
+ * that are shared between the sequential encoder (jchuff.c) and the
+ * progressive encoder (jcphuff.c).  No other modules need to see these.
+ */
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit data;
+ * -16384 .. +16383 for 12-bit data.
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MAX_COEF_BITS 10
+#else
+#define MAX_COEF_BITS 14
+#endif
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];	/* code for each symbol */
+  char ehufsi[256];		/* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_c_derived_tbl	jMkCDerived
+#define jpeg_gen_optimal_table	jGenOptTbl
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_c_derived_tbl
+	JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
+	     c_derived_tbl ** pdtbl));
+
+/* Generate an optimal table definition given the specified counts */
+EXTERN(void) jpeg_gen_optimal_table
+	JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
diff --git a/JPEG/jcinit.cpp b/JPEG/jcinit.cpp
new file mode 100644
index 0000000..5efffe3
--- /dev/null
+++ b/JPEG/jcinit.cpp
@@ -0,0 +1,72 @@
+/*
+ * jcinit.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master (j_compress_ptr cinfo)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (! cinfo->raw_data_in) {
+    jinit_color_converter(cinfo);
+    jinit_downsampler(cinfo);
+    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+  }
+  /* Forward DCT */
+  jinit_forward_dct(cinfo);
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* Need a full-image coefficient buffer in any multi-pass mode. */
+  jinit_c_coef_controller(cinfo,
+		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/JPEG/jcmainct.cpp b/JPEG/jcmainct.cpp
new file mode 100644
index 0000000..e0279a7
--- /dev/null
+++ b/JPEG/jcmainct.cpp
@@ -0,0 +1,293 @@
+/*
+ * jcmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Note: currently, there is no operating mode in which a full-image buffer
+ * is needed at this step.  If there were, that mode could not be used with
+ * "raw data" input, since this module is bypassed in that case.  However,
+ * we've left the code here for possible use in special applications.
+ */
+#undef FULL_MAIN_BUFFER_SUPPORTED
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
+  boolean suspended;		/* remember if we suspended output */
+  J_BUF_MODE pass_mode;		/* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  /* If using full-image storage, this array holds pointers to virtual-array
+   * control blocks for each component.  Unused if not full-image storage.
+   */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+METHODDEF(void) process_data_buffer_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  main->cur_iMCU_row = 0;	/* initialize counters */
+  main->rowgroup_ctr = 0;
+  main->suspended = FALSE;
+  main->pass_mode = pass_mode;	/* save mode for use by process_data */
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    if (main->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+    main->pub.process_data = process_data_simple_main;
+    break;
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  case JBUF_SAVE_SOURCE:
+  case JBUF_CRANK_DEST:
+  case JBUF_SAVE_AND_PASS:
+    if (main->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    main->pub.process_data = process_data_buffer_main;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (main->rowgroup_ctr < DCTSIZE)
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (main->rowgroup_ctr != DCTSIZE)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (! main->suspended) {
+	(*in_row_ctr)--;
+	main->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (main->suspended) {
+      (*in_row_ctr)++;
+      main->suspended = FALSE;
+    }
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+
+/*
+ * Process some data.
+ * This routine handles all of the modes that use a full-size buffer.
+ */
+
+METHODDEF(void)
+process_data_buffer_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Realign the virtual buffers if at the start of an iMCU row. */
+    if (main->rowgroup_ctr == 0) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, main->whole_image[ci],
+	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
+      }
+      /* In a read pass, pretend we just read some source data. */
+      if (! writing) {
+	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
+	main->rowgroup_ctr = DCTSIZE;
+      }
+    }
+
+    /* If a write pass, read input data until the current iMCU row is full. */
+    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
+    if (writing) {
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+      /* Return to application if we need more data to fill the iMCU row. */
+      if (main->rowgroup_ctr < DCTSIZE)
+	return;
+    }
+
+    /* Emit data, unless this is a sink-only pass. */
+    if (main->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+	/* If compressor did not consume the whole row, then we must need to
+	 * suspend processing and return to the application.  In this situation
+	 * we pretend we didn't yet consume the last input row; otherwise, if
+	 * it happened to be the last row of the image, the application would
+	 * think we were done.
+	 */
+	if (! main->suspended) {
+	  (*in_row_ctr)--;
+	  main->suspended = TRUE;
+	}
+	return;
+      }
+      /* We did finish the row.  Undo our little suspension hack if a previous
+       * call suspended; then mark the main buffer empty.
+       */
+      if (main->suspended) {
+	(*in_row_ctr)++;
+	main->suspended = FALSE;
+      }
+    }
+
+    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+#endif /* FULL_MAIN_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_c_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component */
+    /* Note we pad the bottom to a multiple of the iMCU height */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor) * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    main->whole_image[0] = NULL; /* flag for no virtual arrays */
+#endif
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+  }
+}
diff --git a/JPEG/jcmarker.cpp b/JPEG/jcmarker.cpp
new file mode 100644
index 0000000..3d1e6c6
--- /dev/null
+++ b/JPEG/jcmarker.cpp
@@ -0,0 +1,664 @@
+/*
+ * jcmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer * my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte (j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0) {
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int) mark);
+}
+
+
+LOCAL(void)
+emit_2bytes (j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt (j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i < DCTSIZE2; i++) {
+    if (qtbl->quantval[i] > 255)
+      prec = 1;
+  }
+
+  if (! qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo, prec ? DCTSIZE2*2 + 1 + 2 : DCTSIZE2 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec<<4));
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
+      if (prec)
+	emit_byte(cinfo, (int) (qval >> 8));
+      emit_byte(cinfo, (int) (qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL * htbl;
+  int length, i;
+  
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;		/* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+  
+  if (! htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+    
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+    
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+    
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+    
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+    
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac (j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    dc_in_use[compptr->dc_tbl_no] = 1;
+    ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+  
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+  
+  emit_marker(cinfo, M_DAC);
+  
+  emit_2bytes(cinfo, length*2 + 2);
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    if (dc_in_use[i]) {
+      emit_byte(cinfo, i);
+      emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+    }
+    if (ac_in_use[i]) {
+      emit_byte(cinfo, i + 0x10);
+      emit_byte(cinfo, cinfo->arith_ac_K[i]);
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri (j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+  
+  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, (int) cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, code);
+  
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long) cinfo->image_height > 65535L ||
+      (long) cinfo->image_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int) cinfo->image_height);
+  emit_2bytes(cinfo, (int) cinfo->image_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos (j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, cinfo->comps_in_scan);
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+    td = compptr->dc_tbl_no;
+    ta = compptr->ac_tbl_no;
+    if (cinfo->progressive_mode) {
+      /* Progressive mode: only DC or only AC tables are used in one scan;
+       * furthermore, Huffman coding of DC refinement uses no table at all.
+       * We emit 0 for unused field(s); this is recommended by the P&M text
+       * but does not seem to be specified in the standard.
+       */
+      if (cinfo->Ss == 0) {
+	ta = 0;			/* DC scan */
+	if (cinfo->Ah != 0 && !cinfo->arith_code)
+	  td = 0;		/* no DC table either */
+      } else {
+	td = 0;			/* AC scan */
+      }
+    }
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_jfif_app0 (j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block	(2 bytes)
+   * Block ID			(4 bytes - ASCII "JFIF")
+   * Zero byte			(1 byte to terminate the ID string)
+   * Version Major, Minor	(2 bytes - major first)
+   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu			(2 bytes - dots per unit horizontal)
+   * Ydpu			(2 bytes - dots per unit vertical)
+   * Thumbnail X size		(1 byte)
+   * Thumbnail Y size		(1 byte)
+   */
+  
+  emit_marker(cinfo, M_APP0);
+  
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int) cinfo->X_density);
+  emit_2bytes(cinfo, (int) cinfo->Y_density);
+  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14 (j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block	(2 bytes)
+   * Block ID			(5 bytes - ASCII "Adobe")
+   * Version Number		(2 bytes - currently 100)
+   * Flags0			(2 bytes - currently 0)
+   * Flags1			(2 bytes - currently 0)
+   * Color transform		(1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+  
+  emit_marker(cinfo, M_APP14);
+  
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);	/* Version */
+  emit_2bytes(cinfo, 0);	/* Flags0 */
+  emit_2bytes(cinfo, 0);	/* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    break;
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int) 65533)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER) marker);
+
+  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+}
+
+METHODDEF(void)
+write_marker_byte (j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker should NOT
+ * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);	/* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header (j_compress_ptr cinfo)
+{
+  int ci, prec;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+  
+  /* Emit DQT for each quantization table.
+   * Note that emit_dqt() suppresses any duplicate tables.
+   */
+  prec = 0;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  }
+  /* now prec is nonzero iff there are any 16-bit quant tables. */
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->data_precision != 8) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+	is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    emit_sof(cinfo, M_SOF9);	/* SOF code for arithmetic coding */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+  }
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      if (cinfo->progressive_mode) {
+	/* Progressive mode: only DC or only AC tables are used in one scan */
+	if (cinfo->Ss == 0) {
+	  if (cinfo->Ah == 0)	/* DC needs no table for refinement scan */
+	    emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	} else {
+	  emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+	}
+      } else {
+	/* Sequential mode: need both DC and AC tables */
+	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+      }
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer (j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only (j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void) emit_dqt(cinfo, i);
+  }
+
+  if (! cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_marker_writer));
+  cinfo->marker = (struct jpeg_marker_writer *) marker;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/JPEG/jcmaster.cpp b/JPEG/jcmaster.cpp
new file mode 100644
index 0000000..aab4020
--- /dev/null
+++ b/JPEG/jcmaster.cpp
@@ -0,0 +1,590 @@
+/*
+ * jcmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work 
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef enum {
+	main_pass,		/* input data, also do first output step */
+	huff_opt_pass,		/* Huffman code optimization pass */
+	output_pass		/* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;	/* public fields */
+
+  c_pass_type pass_type;	/* the type of the current pass */
+
+  int pass_number;		/* # of passes completed */
+  int total_passes;		/* total # of passes needed */
+
+  int scan_number;		/* current index in scan_info[] */
+} my_comp_master;
+
+typedef my_comp_master * my_master_ptr;
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+LOCAL(void)
+initial_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Sanity check on image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0 || cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* For compression, we never do DCT scaling. */
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed (this flag isn't actually used for compression) */
+    compptr->component_needed = TRUE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(void)
+validate_script (j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info * scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int * last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    last_bitpos_ptr = & last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+	*last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
+       * seems wrong: the upper bound ought to depend on data precision.
+       * Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+#if BITS_IN_JSAMPLE == 8
+#define MAX_AH_AL 10
+#else
+#define MAX_AH_AL 13
+#endif
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+	if (Se != 0)		/* DC and AC together not OK */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+	if (ncomps != 1)	/* AC scans must be for only one component */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	for (coefi = Ss; coefi <= Se; coefi++) {
+	  if (last_bitpos_ptr[coefi] < 0) {
+	    /* first scan of this coefficient */
+	    if (Ah != 0)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  } else {
+	    /* not first scan */
+	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  }
+	  last_bitpos_ptr[coefi] = Al;
+	}
+      }
+#endif
+    } else {
+      /* For sequential JPEG, all progression parameters must be these: */
+      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+	thisi = scanptr->component_index[ci];
+	if (component_sent[thisi])
+	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+	component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (! component_sent[ci])
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+LOCAL(void)
+select_scan_parameters (j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr) cinfo->master;
+    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+	&cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    cinfo->Ss = scanptr->Ss;
+    cinfo->Se = scanptr->Se;
+    cinfo->Ah = scanptr->Ah;
+    cinfo->Al = scanptr->Al;
+  }
+  else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+    cinfo->Ss = 0;
+    cinfo->Se = DCTSIZE2-1;
+    cinfo->Ah = 0;
+    cinfo->Al = 0;
+  }
+}
+
+
+LOCAL(void)
+per_scan_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = DCTSIZE;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (! cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+				(master->total_passes > 1 ?
+				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+    /*FALLTHROUGH*/
+#endif
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (! cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup (j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (! cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_comp_master));
+  cinfo->master = (struct jpeg_comp_master *) master;
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo);
+
+  if (cinfo->scan_info != NULL) {
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    validate_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+  if (cinfo->progressive_mode)	/*  TEMPORARY HACK ??? */
+    cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+}
diff --git a/JPEG/jcomapi.cpp b/JPEG/jcomapi.cpp
new file mode 100644
index 0000000..9b1fa75
--- /dev/null
+++ b/JPEG/jcomapi.cpp
@@ -0,0 +1,106 @@
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/JPEG/jconfig.h b/JPEG/jconfig.h
new file mode 100644
index 0000000..6696272
--- /dev/null
+++ b/JPEG/jconfig.h
@@ -0,0 +1,37 @@
+/* jconfig.h --- generated by ckconfig.c */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED		/* BMP image file format */
+#define GIF_SUPPORTED		/* GIF image file format */
+#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED		/* Utah RLE image file format */
+#define TARGA_SUPPORTED		/* Targa image file format */
+
+#undef TWO_FILE_COMMANDLINE	/* You may need this on non-Unix systems */
+#undef NEED_SIGNAL_CATCHER	/* Define this if you use jmemname.c */
+#undef DONT_USE_B_MODE
+/* #define PROGRESS_REPORT */	/* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/JPEG/jcparam.cpp b/JPEG/jcparam.cpp
new file mode 100644
index 0000000..6fc48f5
--- /dev/null
+++ b/JPEG/jcparam.cpp
@@ -0,0 +1,610 @@
+/*
+ * jcparam.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+		      const unsigned int *basic_table,
+		      int scale_factor, boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL ** qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;		/* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16) temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+GLOBAL(void)
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+			 boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* These are the sample quantization tables given in JPEG spec section K.1.
+   * The spec says that the values given produce "good" quality, and
+   * when divided by 2, "very good" quality.
+   */
+  static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+  };
+  static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+  };
+
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling (int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality*2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding three routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_compress_ptr cinfo,
+		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_compress_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
+		 bits_dc_luminance, val_dc_luminance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
+		 bits_ac_luminance, val_ac_luminance);
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
+		 bits_dc_chrominance, val_dc_chrominance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
+		 bits_ac_chrominance, val_ac_chrominance);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults (j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Set up two Huffman tables */
+  std_huff_tables(cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* Use Huffman coding, not arithmetic coding, by default */
+  cinfo->arith_code = FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, force optimization on so that usable
+   * tables will be computed.  This test can be removed if default tables
+   * are supplied that are valid for the desired precision.
+   */
+  if (cinfo->data_precision > 8)
+    cinfo->optimize_coding = TRUE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
+  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace (j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info * compptr;
+  int ci;
+
+#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 1, 1,1, 0, 0,0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
+    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(3, 4, 2,2, 0, 0,0);
+    break;
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan (jpeg_scan_info * scanptr, int ci,
+	     int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans (jpeg_scan_info * scanptr, int ncomps,
+	    int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression (j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info * scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCbCr even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/JPEG/jcphuff.cpp b/JPEG/jcphuff.cpp
new file mode 100644
index 0000000..b86f659
--- /dev/null
+++ b/JPEG/jcphuff.cpp
@@ -0,0 +1,833 @@
+/*
+ * jcphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines for progressive JPEG.
+ *
+ * We do not support output suspension in this module, since the library
+ * currently does not allow multiple-scan files to be written with output
+ * suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jchuff.c */
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+/* Expanded entropy encoder object for progressive Huffman encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* Bit-level coding status.
+   * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for DC components */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;		/* the table number of the single component */
+  unsigned int EOBRUN;		/* run length of EOBs */
+  unsigned int BE;		/* # of buffered correction bits before MCU */
+  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan).
+   * Since any one scan codes only DC or only AC, we only need one set
+   * of tables, not one for DC and one for AC.
+   */
+  c_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization; again, one set is enough */
+  long * count_ptrs[NUM_HUFF_TBLS];
+} phuff_entropy_encoder;
+
+typedef phuff_entropy_encoder * phuff_entropy_ptr;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo));
+METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo));
+
+
+/*
+ * Initialize for a Huffman-compressed scan using progressive JPEG.
+ */
+
+METHODDEF(void)
+start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
+{  
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  entropy->cinfo = cinfo;
+  entropy->gather_statistics = gather_statistics;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* We assume jcmaster.c already validated the scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_first;
+    else
+      entropy->pub.encode_mcu = encode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_refine;
+    else {
+      entropy->pub.encode_mcu = encode_mcu_AC_refine;
+      /* AC refinement needs a correction bit buffer */
+      if (entropy->bit_buffer == NULL)
+	entropy->bit_buffer = (char *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      MAX_CORR_BITS * SIZEOF(char));
+    }
+  }
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather_phuff;
+  else
+    entropy->pub.finish_pass = finish_pass_phuff;
+
+  /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
+   * for AC coefficients.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Initialize DC predictions to 0 */
+    entropy->last_dc_val[ci] = 0;
+    /* Get table index */
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
+    }
+    if (gather_statistics) {
+      /* Check for invalid table index */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->count_ptrs[tbl] == NULL)
+	entropy->count_ptrs[tbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long));
+    } else {
+      /* Compute derived values for Huffman table */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
+			      & entropy->derived_tbls[tbl]);
+    }
+  }
+
+  /* Initialize AC stuff */
+  entropy->EOBRUN = 0;
+  entropy->BE = 0;
+
+  /* Initialize bit buffer to empty */
+  entropy->put_buffer = 0;
+  entropy->put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte */
+#define emit_byte(entropy,val)  \
+	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(entropy)->free_in_buffer == 0)  \
+	    dump_buffer(entropy); }
+
+
+LOCAL(void)
+dump_buffer (phuff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+//INLINE
+LOCAL(void)
+emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = entropy->put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;			/* do nothing if we're only getting stats */
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte(entropy, c);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->put_buffer = put_buffer; /* update variables */
+  entropy->put_bits = put_bits;
+}
+
+
+LOCAL(void)
+flush_bits (phuff_entropy_ptr entropy)
+{
+  emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
+  entropy->put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+//INLINE
+LOCAL(void)
+emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->derived_tbls[tbl_no];
+    emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart,
+		    unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;			/* no real work */
+
+  while (nbits > 0) {
+    emit_bits(entropy, (unsigned int) (*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun (phuff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = 0;
+    while ((temp >>= 1))
+      nbits++;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (phuff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (! entropy->gather_statistics) {
+    flush_bits(entropy);
+    emit_byte(entropy, 0xFF);
+    emit_byte(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  int blkn, ci;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  jpeg_component_info * compptr;
+  ISHIFT_TEMPS
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    temp = temp2 - entropy->last_dc_val[ci];
+    entropy->last_dc_val[ci] = temp2;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+    
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    while (temp) {
+      nbits++;
+      temp >>= 1;
+    }
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > MAX_COEF_BITS+1)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+    
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_symbol(entropy, compptr->dc_tbl_no, nbits);
+    
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (nbits)			/* emit_bits rejects calls with size 0 */
+      emit_bits(entropy, (unsigned int) temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+  
+  r = 0;			/* r = run length of zeros */
+   
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = (*block)[jpeg_natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value; so the code is
+     * interwoven with finding the abs value (temp) and output bits (temp2).
+     */
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      temp >>= Al;		/* apply the point transform */
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;		/* apply the point transform */
+      temp2 = temp;
+    }
+    /* Watch out for case that nonzero coef is zero after point transform */
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any pending EOBRUN */
+    if (entropy->EOBRUN > 0)
+      emit_eobrun(entropy);
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 1;			/* there must be at least one 1 bit */
+    while ((temp >>= 1))
+      nbits++;
+    /* Check for out-of-range coefficient values */
+    if (nbits > MAX_COEF_BITS)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits(entropy, (unsigned int) temp2, nbits);
+
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0) {			/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);	/* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  int blkn;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    temp = (*block)[0];
+    emit_bits(entropy, (unsigned int) (temp >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  register int r, k;
+  int EOB;
+  char *BR_buffer;
+  unsigned int BR;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  int absvalues[DCTSIZE2];
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* It is convenient to make a pre-pass to determine the transformed
+   * coefficients' absolute values and the EOB position.
+   */
+  EOB = 0;
+  for (k = cinfo->Ss; k <= Se; k++) {
+    temp = (*block)[jpeg_natural_order[k]];
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+    temp >>= Al;		/* apply the point transform */
+    absvalues[k] = temp;	/* save abs value for main pass */
+    if (temp == 1)
+      EOB = k;			/* EOB = index of last newly-nonzero coef */
+  }
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+  
+  r = 0;			/* r = run length of zeros */
+  BR = 0;			/* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = absvalues[k]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any required ZRLs, but not if they can be folded into EOB */
+    while (r > 15 && k <= EOB) {
+      /* emit any pending EOBRUN and the BE correction bits */
+      emit_eobrun(entropy);
+      /* Emit ZRL */
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+      /* Emit buffered correction bits that must be associated with ZRL */
+      emit_buffered_bits(entropy, BR_buffer, BR);
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+      BR = 0;
+    }
+
+    /* If the coef was previously nonzero, it only needs a correction bit.
+     * NOTE: a straight translation of the spec's figure G.7 would suggest
+     * that we also need to test r > 15.  But if r > 15, we can only get here
+     * if k > EOB, which implies that this coefficient is not 1.
+     */
+    if (temp > 1) {
+      /* The correction bit is the next bit of the absolute value. */
+      BR_buffer[BR++] = (char) (temp & 1);
+      continue;
+    }
+
+    /* Emit any pending EOBRUN and the BE correction bits */
+    emit_eobrun(entropy);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
+
+    /* Emit output bit for newly-nonzero coef */
+    temp = ((*block)[jpeg_natural_order[k]] < 0) ? 0 : 1;
+    emit_bits(entropy, (unsigned int) temp, 1);
+
+    /* Emit buffered correction bits that must be associated with this code */
+    emit_buffered_bits(entropy, BR_buffer, BR);
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+    BR = 0;
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    entropy->BE += BR;		/* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed progressive scan.
+ */
+
+METHODDEF(void)
+finish_pass_phuff (j_compress_ptr cinfo)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Flush out any buffered data */
+  emit_eobrun(entropy);
+  flush_bits(entropy);
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather_phuff (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did[NUM_HUFF_TBLS];
+
+  /* Flush out buffered data (all we care about is counting the EOB symbol) */
+  emit_eobrun(entropy);
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did, SIZEOF(did));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      tbl = compptr->ac_tbl_no;
+    }
+    if (! did[tbl]) {
+      if (is_DC_band)
+        htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
+      else
+        htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
+      did[tbl] = TRUE;
+    }
+  }
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_encoder (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+    entropy->count_ptrs[i] = NULL;
+  }
+  entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/JPEG/jcprepct.cpp b/JPEG/jcprepct.cpp
new file mode 100644
index 0000000..fa93333
--- /dev/null
+++ b/JPEG/jcprepct.cpp
@@ -0,0 +1,354 @@
+/*
+ * jcprepct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
+  int next_buf_row;		/* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
+  int this_row_group;		/* starting row index of group to process */
+  int next_buf_stop;		/* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller * my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
+		    int input_rows, int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    jcopy_sample_rows(image_data, input_rows-1, image_data, row,
+		      1, num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		  JDIMENSION in_rows_avail,
+		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		  JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info * compptr;
+
+  while (*in_row_ctr < in_rows_avail &&
+	 *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int) MIN((JDIMENSION) numrows, inrows);
+    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+				       prep->color_buf,
+				       (JDIMENSION) prep->next_buf_row,
+				       numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+	prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			   prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf, (JDIMENSION) 0,
+					output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 &&
+	*out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	expand_bottom_edge(output_buf[ci],
+			   compptr->width_in_blocks * DCTSIZE,
+			   (int) (*out_row_group_ctr * compptr->v_samp_factor),
+			   (int) (out_row_groups_avail * compptr->v_samp_factor));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;			/* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context (j_compress_ptr cinfo,
+		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		     JDIMENSION in_rows_avail,
+		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		     JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int) MIN((JDIMENSION) numrows, inrows);
+      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+					 prep->color_buf,
+					 (JDIMENSION) prep->next_buf_row,
+					 numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  int row;
+	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+	    jcopy_sample_rows(prep->color_buf[ci], 0,
+			      prep->color_buf[ci], -row,
+			      1, cinfo->image_width);
+	  }
+	}
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+	break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			     prep->next_buf_row, prep->next_buf_stop);
+	}
+	prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf,
+					(JDIMENSION) prep->this_row_group,
+					output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+	prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+	prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer (j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info * compptr;
+  JSAMPARRAY true_buffer, fake_buffer;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (JSAMPARRAY)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(cinfo->num_components * 5 * rgroup_height) *
+				SIZEOF(JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION) (3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
+	    3 * rgroup_height * SIZEOF(JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info * compptr;
+
+  if (need_full_buffer)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_prep_controller));
+  cinfo->prep = (struct jpeg_c_prep_controller *) prep;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub.pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub.pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/JPEG/jcsample.cpp b/JPEG/jcsample.cpp
new file mode 100644
index 0000000..212ec87
--- /dev/null
+++ b/JPEG/jcsample.cpp
@@ -0,0 +1,519 @@
+/*
+ * jcsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to downsample a single component */
+typedef JMETHOD(void, downsample1_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;	/* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler * my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample (j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge (JSAMPARRAY image_data, int num_rows,
+		   JDIMENSION input_cols, JDIMENSION output_cols)
+{
+  register JSAMPROW ptr;
+  register JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int) (output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      for (count = numcols; count > 0; count--)
+	*ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample (j_compress_ptr cinfo,
+		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  JSAMPROW inptr, outptr;
+  INT32 outvalue;
+
+  h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
+  v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
+  numpix = h_expand * v_expand;
+  numpix2 = numpix/2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * h_expand);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+	 outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+	inptr = input_data[inrow+v] + outcol_h;
+	for (h = 0; h < h_expand; h++) {
+	  outvalue += (INT32) GETJSAMPLE(*inptr++);
+	}
+      }
+      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  /* Copy the data */
+  jcopy_sample_rows(input_data, 0, output_data, 0,
+		    cinfo->max_v_samp_factor, cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
+			      + bias) >> 1);
+      bias ^= 1;		/* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+			      + bias) >> 2);
+      bias ^= 3;		/* 1=>2, 2=>1 */
+      inptr0 += 2; inptr1 += 2;
+    }
+    inrow += 2;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+			JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      /* sum of edge-neighbor pixels */
+      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+    inrow += 2;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
+			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    above_ptr = input_data[outrow-1];
+    below_ptr = input_data[outrow+1];
+
+    /* Special case for first column */
+    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+	     GETJSAMPLE(*inptr);
+    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		 GETJSAMPLE(*inptr);
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    lastcolsum = colsum; colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = GETJSAMPLE(*inptr++);
+      above_ptr++; below_ptr++;
+      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		   GETJSAMPLE(*inptr);
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      lastcolsum = colsum; colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr);
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+jinit_downsampler (j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean smoothok = TRUE;
+
+  downsample = (my_downsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_downsampler));
+  cinfo->downsample = (struct jpeg_downsampler *) downsample;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub.downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
+	compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = fullsize_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = fullsize_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+      smoothok = FALSE;
+      downsample->methods[ci] = h2v1_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = h2v2_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = h2v2_downsample;
+    } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
+	       (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
diff --git a/JPEG/jctrans.cpp b/JPEG/jctrans.cpp
new file mode 100644
index 0000000..0e6d707
--- /dev/null
+++ b/JPEG/jctrans.cpp
@@ -0,0 +1,388 @@
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   */
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's Huffman table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Although we don't actually use input_components for transcoding,
+   * jcmaster.c's initial_setup will complain if input_components is 0.
+   */
+  cinfo->input_components = 1;
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yindex+yoffset < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	    for (xindex = 0; xindex < blockcnt; xindex++)
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = 0;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  for (; xindex < compptr->MCU_width; xindex++) {
+	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+	    blkn++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
diff --git a/JPEG/jdapimin.cpp b/JPEG/jdapimin.cpp
new file mode 100644
index 0000000..2e7fd6c
--- /dev/null
+++ b/JPEG/jdapimin.cpp
@@ -0,0 +1,394 @@
+/*
+* jdapimin.c
+*
+* Copyright (C) 1994-1998, Thomas G. Lane.
+* This file is part of the Independent JPEG Group's software.
+* For conditions of distribution and use, see the accompanying README file.
+*
+* This file contains application interface code for the decompression half
+* of the JPEG library.  These are the "minimum" API routines that may be
+* needed in either the normal full-decompression case or the
+* transcoding-only case.
+*
+* Most of the routines intended to be called directly by an application
+* are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+* shared by compression and decompression, and jdtrans.c for the transcoding
+* case.
+*/
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+* Initialization of a JPEG decompression object.
+* The error manager must already be set up (in case memory manager fails).
+*/
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+	int i;
+	
+	/* Guard against version mismatches between library and caller. */
+	cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+	if (version != JPEG_LIB_VERSION)
+		ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+	if (structsize != SIZEOF(struct jpeg_decompress_struct))
+		ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+		(int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+	
+		/* For debugging purposes, we zero the whole master structure.
+		* But the application has already set the err pointer, and may have set
+		* client_data, so we have to save and restore those fields.
+		* Note: if application hasn't set client_data, tools like Purify may
+		* complain here.
+	*/
+	{
+		struct jpeg_error_mgr * err = cinfo->err;
+		void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+		MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+		cinfo->err = err;
+		cinfo->client_data = client_data;
+	}
+	cinfo->is_decompressor = TRUE;
+	
+	/* Initialize a memory manager instance for this object */
+	jinit_memory_mgr((j_common_ptr) cinfo);
+	
+	/* Zero out pointers to permanent structures. */
+	cinfo->progress = NULL;
+	cinfo->src = NULL;
+	
+	for (i = 0; i < NUM_QUANT_TBLS; i++)
+		cinfo->quant_tbl_ptrs[i] = NULL;
+	
+	for (i = 0; i < NUM_HUFF_TBLS; i++) {
+		cinfo->dc_huff_tbl_ptrs[i] = NULL;
+		cinfo->ac_huff_tbl_ptrs[i] = NULL;
+	}
+	
+	/* Initialize marker processor so application can override methods
+	* for COM, APPn markers before calling jpeg_read_header.
+	*/
+	cinfo->marker_list = NULL;
+	jinit_marker_reader(cinfo);
+	
+	/* And initialize the overall input controller. */
+	jinit_input_controller(cinfo);
+	
+	/* OK, I'm ready */
+	cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+* Destruction of a JPEG decompression object
+*/
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+	jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+* Abort processing of a JPEG decompression operation,
+* but don't destroy the object itself.
+*/
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+	jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+* Set default decompression parameters.
+*/
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+	/* Guess the input colorspace, and set output colorspace accordingly. */
+	/* (Wish JPEG committee had provided a real way to specify this...) */
+	/* Note application may override our guesses. */
+	switch (cinfo->num_components) {
+	case 1:
+		cinfo->jpeg_color_space = JCS_GRAYSCALE;
+		cinfo->out_color_space = JCS_GRAYSCALE;
+		break;
+		
+	case 3:
+		if (cinfo->saw_JFIF_marker) {
+			cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
+		} else if (cinfo->saw_Adobe_marker) {
+			switch (cinfo->Adobe_transform) {
+			case 0:
+				cinfo->jpeg_color_space = JCS_RGB;
+				break;
+			case 1:
+				cinfo->jpeg_color_space = JCS_YCbCr;
+				break;
+			default:
+				WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+				cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+				break;
+			}
+		} else {
+			/* Saw no special markers, try to guess from the component IDs */
+			int cid0 = cinfo->comp_info[0].component_id;
+			int cid1 = cinfo->comp_info[1].component_id;
+			int cid2 = cinfo->comp_info[2].component_id;
+			
+			if (cid0 == 1 && cid1 == 2 && cid2 == 3)
+				cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+			else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+				cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+			else {
+				TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+				cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+			}
+		}
+		/* Always guess RGB is proper output colorspace. */
+		cinfo->out_color_space = JCS_RGB;
+		break;
+		
+			case 4:
+				if (cinfo->saw_Adobe_marker) {
+					switch (cinfo->Adobe_transform) {
+					case 0:
+						cinfo->jpeg_color_space = JCS_CMYK;
+						break;
+					case 2:
+						cinfo->jpeg_color_space = JCS_YCCK;
+						break;
+					default:
+						WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+						cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+						break;
+					}
+				} else {
+					/* No special markers, assume straight CMYK. */
+					cinfo->jpeg_color_space = JCS_CMYK;
+				}
+				cinfo->out_color_space = JCS_CMYK;
+				break;
+				
+					default:
+						cinfo->jpeg_color_space = JCS_UNKNOWN;
+						cinfo->out_color_space = JCS_UNKNOWN;
+						break;
+	}
+	
+	/* Set defaults for other decompression parameters. */
+	cinfo->scale_num = 1;		/* 1:1 scaling */
+	cinfo->scale_denom = 1;
+	cinfo->output_gamma = 1.0;
+	cinfo->buffered_image = FALSE;
+	cinfo->raw_data_out = FALSE;
+	cinfo->dct_method = JDCT_DEFAULT;
+	cinfo->do_fancy_upsampling = TRUE;
+	cinfo->do_block_smoothing = TRUE;
+	cinfo->quantize_colors = FALSE;
+	/* We set these in case application only sets quantize_colors. */
+	cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+	cinfo->two_pass_quantize = TRUE;
+#else
+	cinfo->two_pass_quantize = FALSE;
+#endif
+	cinfo->desired_number_of_colors = 256;
+	cinfo->colormap = NULL;
+	/* Initialize for no mode change in buffered-image mode. */
+	cinfo->enable_1pass_quant = FALSE;
+	cinfo->enable_external_quant = FALSE;
+	cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+* Decompression startup: read start of JPEG datastream to see what's there.
+* Need only initialize JPEG object and supply a data source before calling.
+*
+* This routine will read as far as the first SOS marker (ie, actual start of
+* compressed data), and will save all tables and parameters in the JPEG
+* object.  It will also initialize the decompression parameters to default
+* values, and finally return JPEG_HEADER_OK.  On return, the application may
+* adjust the decompression parameters and then call jpeg_start_decompress.
+* (Or, if the application only wanted to determine the image parameters,
+* the data need not be decompressed.  In that case, call jpeg_abort or
+* jpeg_destroy to release any temporary space.)
+* If an abbreviated (tables only) datastream is presented, the routine will
+* return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+* re-use the JPEG object to read the abbreviated image datastream(s).
+* It is unnecessary (but OK) to call jpeg_abort in this case.
+* The JPEG_SUSPENDED return code only occurs if the data source module
+* requests suspension of the decompressor.  In this case the application
+* should load more source data and then re-call jpeg_read_header to resume
+* processing.
+* If a non-suspending data source is used and require_image is TRUE, then the
+* return code need not be inspected since only JPEG_HEADER_OK is possible.
+*
+* This routine is now just a front end to jpeg_consume_input, with some
+* extra error checking.
+*/
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+	int retcode;
+	
+	if (cinfo->global_state != DSTATE_START &&
+		cinfo->global_state != DSTATE_INHEADER)
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+	
+	retcode = jpeg_consume_input(cinfo);
+	
+	switch (retcode) {
+	case JPEG_REACHED_SOS:
+		retcode = JPEG_HEADER_OK;
+		break;
+	case JPEG_REACHED_EOI:
+		if (require_image)		/* Complain if application wanted an image */
+			ERREXIT(cinfo, JERR_NO_IMAGE);
+			/* Reset to start state; it would be safer to require the application to
+			* call jpeg_abort, but we can't change it now for compatibility reasons.
+			* A side effect is to free any temporary memory (there shouldn't be any).
+		*/
+		jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+		retcode = JPEG_HEADER_TABLES_ONLY;
+		break;
+	case JPEG_SUSPENDED:
+		/* no work */
+		break;
+	}
+	
+	return retcode;
+}
+
+
+/*
+* Consume data in advance of what the decompressor requires.
+* This can be called at any time once the decompressor object has
+* been created and a data source has been set up.
+*
+* This routine is essentially a state machine that handles a couple
+* of critical state-transition actions, namely initial setup and
+* transition from header scanning to ready-for-start_decompress.
+* All the actual input is done via the input controller's consume_input
+* method.
+*/
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+	int retcode = JPEG_SUSPENDED;
+	
+	/* NB: every possible DSTATE value should be listed in this switch */
+	switch (cinfo->global_state) {
+	case DSTATE_START:
+		/* Start-of-datastream actions: reset appropriate modules */
+		(*cinfo->inputctl->reset_input_controller) (cinfo);
+		/* Initialize application's data source module */
+		(*cinfo->src->init_source) (cinfo);
+		cinfo->global_state = DSTATE_INHEADER;
+		/*FALLTHROUGH*/
+	case DSTATE_INHEADER:
+		retcode = (*cinfo->inputctl->consume_input) (cinfo);
+		if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+			/* Set up default parameters based on header data */
+			default_decompress_parms(cinfo);
+			/* Set global state: ready for start_decompress */
+			cinfo->global_state = DSTATE_READY;
+		}
+		break;
+	case DSTATE_READY:
+		/* Can't advance past first SOS until start_decompress is called */
+		retcode = JPEG_REACHED_SOS;
+		break;
+	case DSTATE_PRELOAD:
+	case DSTATE_PRESCAN:
+	case DSTATE_SCANNING:
+	case DSTATE_RAW_OK:
+	case DSTATE_BUFIMAGE:
+	case DSTATE_BUFPOST:
+	case DSTATE_STOPPING:
+		retcode = (*cinfo->inputctl->consume_input) (cinfo);
+		break;
+	default:
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+	}
+	return retcode;
+}
+
+
+/*
+* Have we finished reading the input file?
+*/
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+	/* Check for valid jpeg object */
+	if (cinfo->global_state < DSTATE_START ||
+		cinfo->global_state > DSTATE_STOPPING)
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+	return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+* Is there more than one scan?
+*/
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+	/* Only valid after jpeg_read_header completes */
+	if (cinfo->global_state < DSTATE_READY ||
+		cinfo->global_state > DSTATE_STOPPING)
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+	return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+* Finish JPEG decompression.
+*
+* This will normally just verify the file trailer and release temp storage.
+*
+* Returns FALSE if suspended.  The return value need be inspected only if
+* a suspending data source is used.
+*/
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo){
+	if ((cinfo->global_state == DSTATE_SCANNING ||
+		cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+		/* Terminate final pass of non-buffered mode */
+		if (cinfo->output_scanline < cinfo->output_height)
+			ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+		(*cinfo->master->finish_output_pass) (cinfo);
+		cinfo->global_state = DSTATE_STOPPING;
+	} else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+		/* Finishing after a buffered-image operation */
+		cinfo->global_state = DSTATE_STOPPING;
+	} else if (cinfo->global_state != DSTATE_STOPPING) {
+		/* STOPPING = repeat call after a suspension, anything else is error */
+		ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+	}
+	/* Read until EOI */
+	while (! cinfo->inputctl->eoi_reached) {
+		if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+			return FALSE;		/* Suspend, come back later */
+	}
+	/* Do final cleanup */
+	(*cinfo->src->term_source) (cinfo);
+	/* We can use jpeg_abort to release memory and reset global_state */
+	jpeg_abort((j_common_ptr) cinfo);
+	return TRUE;
+}
diff --git a/JPEG/jdapistd.cpp b/JPEG/jdapistd.cpp
new file mode 100644
index 0000000..8ed0d4d
--- /dev/null
+++ b/JPEG/jdapistd.cpp
@@ -0,0 +1,277 @@
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+// WARNING: Misha changed here
+//GLOBAL(boolean)
+GLOBAL(unsigned char)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/JPEG/jdatadst.cpp b/JPEG/jdatadst.cpp
new file mode 100644
index 0000000..8644afd
--- /dev/null
+++ b/JPEG/jdatadst.cpp
@@ -0,0 +1,156 @@
+/*
+ * jdatadst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data destination object for stdio output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE * outfile;		/* target stream */
+  JOCTET * buffer;		/* start of buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr * my_dest_ptr;
+
+#if 1
+#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+#else
+// Misha modified for bigger block I/O
+#define OUTPUT_BUF_SIZE  1<<16	/* choose an efficiently fwrite'able size */
+#endif
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  /* Allocate the output buffer --- it will be released when done with image */
+  dest->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+      (size_t) OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  fflush(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (ferror(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   * This makes it dangerous to use this manager and a different destination
+   * manager serially with the same JPEG object, because their private object
+   * sizes may be different.  Caveat programmer.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_destination_mgr));
+  }
+
+  dest = (my_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
diff --git a/JPEG/jdatasrc.cpp b/JPEG/jdatasrc.cpp
new file mode 100644
index 0000000..0b0e978
--- /dev/null
+++ b/JPEG/jdatasrc.cpp
@@ -0,0 +1,215 @@
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;	     	/* source stream */
+  JOCTET * buffer;		    /* start of buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+#if 1
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+// Misha modified for bigger block I/O
+#define INPUT_BUF_SIZE  1<<16	/* choose an efficiently fread'able size */
+#endif
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->pub.bytes_in_buffer) {
+      num_bytes -= (long) src->pub.bytes_in_buffer;
+      (void) fill_input_buffer(cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->pub.next_input_byte += (size_t) num_bytes;
+    src->pub.bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_source_mgr));
+    src = (my_src_ptr) cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
diff --git a/JPEG/jdcoefct.cpp b/JPEG/jdcoefct.cpp
new file mode 100644
index 0000000..4938d20
--- /dev/null
+++ b/JPEG/jdcoefct.cpp
@@ -0,0 +1,736 @@
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * (On 80x86, the workspace is FAR even though it's not really very big;
+   * this is to keep the module interfaces unchanged when a large coefficient
+   * buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      jzero_far((void FAR *) coef->MCU_buffer[0],
+		(size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkn gets
+       * incremented past them!).  Note the inner loop relies on having
+       * allocated the MCU_buffer[] blocks sequentially.
+       */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkn += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_scaled_size;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr,
+			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+			      output_ptr, output_col);
+	      output_col += compptr->DCT_scaled_size;
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  output_ptr += compptr->DCT_scaled_size;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = 0;
+      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components *
+				  (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/JPEG/jdcolor.cpp b/JPEG/jdcolor.cpp
new file mode 100644
index 0000000..6c04dfe
--- /dev/null
+++ b/JPEG/jdcolor.cpp
@@ -0,0 +1,396 @@
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+/**************** YCbCr -> RGB conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	R = Y                + 1.40200 * Cr
+ *	G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *	B = Y + 1.77200 * Cb
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than YCbCr -> RGB **************/
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION count;
+  register int num_components = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->output_width;
+  int ci;
+
+  while (--num_rows >= 0) {
+    for (ci = 0; ci < num_components; ci++) {
+      inptr = input_buf[ci][input_row];
+      outptr = output_buf[0] + ci;
+      for (count = num_cols; count > 0; count--) {
+	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+	outptr += num_components;
+      }
+    }
+    input_row++;
+    output_buf++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCbCr -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_deconverter));
+  cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
+	cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub.color_convert = gray_rgb_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCCK) {
+      cconvert->pub.color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub.color_convert = null_convert;
+    } else			/* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/JPEG/jdct.h b/JPEG/jdct.h
new file mode 100644
index 0000000..04192a2
--- /dev/null
+++ b/JPEG/jdct.h
@@ -0,0 +1,176 @@
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+ * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
+ * for 8-bit samples, INT32 for 12-bit samples.  (NOTE: Floating-point DCT
+ * implementations use an array of type FAST_FLOAT, instead.)
+ * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_scaled_size * DCT_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
+
+#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
diff --git a/JPEG/jddctmgr.cpp b/JPEG/jddctmgr.cpp
new file mode 100644
index 0000000..bbf8d0e
--- /dev/null
+++ b/JPEG/jddctmgr.cpp
@@ -0,0 +1,269 @@
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch (compptr->DCT_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case 1:
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 2:
+      method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 4:
+      method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+#endif
+    case DCTSIZE:
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col]);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/JPEG/jdhuff.cpp b/JPEG/jdhuff.cpp
new file mode 100644
index 0000000..b5ba39f
--- /dev/null
+++ b/JPEG/jdhuff.cpp
@@ -0,0 +1,651 @@
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
+  boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, dctbl, actbl;
+  jpeg_component_info * compptr;
+
+  /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+   * This ought to be an error condition, but we make it a warning because
+   * there are some baseline files out there with all zeroes in these bytes.
+   */
+  if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 ||
+      cinfo->Ah != 0 || cinfo->Al != 0)
+    WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    /* Compute derived values for Huffman tables */
+    /* We may do this more than once for a table, but it's not expensive */
+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
+			    & entropy->dc_derived_tbls[dctbl]);
+    jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
+			    & entropy->ac_derived_tbls[actbl]);
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Precalculate decoding info for each block in an MCU of this scan */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    /* Precalculate which table to use for each block */
+    entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+    entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+    /* Decide whether we really care about the coefficient values */
+    if (compptr->component_needed) {
+      entropy->dc_needed[blkn] = TRUE;
+      /* we don't need the ACs if producing a 1/8th-size image */
+      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+    } else {
+      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jdphuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching (shared with jdphuff.c).
+ * See jdhuff.h for info about usage.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+GLOBAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! cinfo->entropy->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	cinfo->entropy->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ * See jdhuff.h for info about usage.
+ */
+
+GLOBAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode and return one MCU's worth of Huffman-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * Returns FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * this module, since we'll just re-assign them on the next call.)
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+      register int s, k, r;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      if (entropy->dc_needed[blkn]) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	int ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+	(*block)[0] = (JCOEF) s;
+      }
+
+      if (entropy->ac_needed[blkn]) {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      } else {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* In this path we just discard the values */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    DROP_BITS(s);
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      }
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.decode_mcu = decode_mcu;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+  }
+}
diff --git a/JPEG/jdhuff.h b/JPEG/jdhuff.h
new file mode 100644
index 0000000..ae19b6c
--- /dev/null
+++ b/JPEG/jdhuff.h
@@ -0,0 +1,201 @@
+/*
+ * jdhuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy decoding routines
+ * that are shared between the sequential decoder (jdhuff.c) and the
+ * progressive decoder (jdphuff.c).  No other modules need to see these.
+ */
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_d_derived_tbl	jMkDDerived
+#define jpeg_fill_bit_buffer	jFilBitBuf
+#define jpeg_huff_decode	jHufDecode
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_d_derived_tbl
+	JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
+	     d_derived_tbl ** pdtbl));
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+/* Load up the bit buffer to a depth of at least nbits */
+EXTERN(boolean) jpeg_fill_bit_buffer
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, int nbits));
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+/* Out-of-line case for Huffman code fetching */
+EXTERN(int) jpeg_huff_decode
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, d_derived_tbl * htbl, int min_bits));
diff --git a/JPEG/jdinput.cpp b/JPEG/jdinput.cpp
new file mode 100644
index 0000000..0c2ac8f
--- /dev/null
+++ b/JPEG/jdinput.cpp
@@ -0,0 +1,381 @@
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  boolean inheaders;		/* TRUE until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
+   * In the full decompressor, this will be overridden by jdmaster.c;
+   * but in the transcoder, jdmaster.c is not used, so we must do it here.
+   */
+  cinfo->min_DCT_scaled_size = DCTSIZE;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  val = (*cinfo->marker->read_markers) (cinfo);
+
+  switch (val) {
+  case JPEG_REACHED_SOS:	/* Found SOS */
+    if (inputctl->inheaders) {	/* 1st SOS */
+      initial_setup(cinfo);
+      inputctl->inheaders = FALSE;
+      /* Note: start_input_pass must be called by jdmaster.c
+       * before any more input can be consumed.  jdapimin.c is
+       * responsible for enforcing this sequencing.
+       */
+    } else {			/* 2nd or later SOS marker */
+      if (! inputctl->pub.has_multiple_scans)
+	ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+      start_input_pass(cinfo);
+    }
+    break;
+  case JPEG_REACHED_EOI:	/* Found EOI */
+    inputctl->pub.eoi_reached = TRUE;
+    if (inputctl->inheaders) {	/* Tables-only datastream, apparently */
+      if (cinfo->marker->saw_SOF)
+	ERREXIT(cinfo, JERR_SOF_NO_SOS);
+    } else {
+      /* Prevent infinite loop in coef ctlr's decompress_data routine
+       * if user set output_scan_number larger than number of scans.
+       */
+      if (cinfo->output_scan_number > cinfo->input_scan_number)
+	cinfo->output_scan_number = cinfo->input_scan_number;
+    }
+    break;
+  case JPEG_SUSPENDED:
+    break;
+  }
+
+  return val;
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_input_controller));
+  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+}
diff --git a/JPEG/jdmainct.cpp b/JPEG/jdmainct.cpp
new file mode 100644
index 0000000..13c956f
--- /dev/null
+++ b/JPEG/jdmainct.cpp
@@ -0,0 +1,512 @@
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
+ * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  main->xbuffer[0] = (JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    main->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    main->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in main->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = main->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = main->xbuffer[main->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      main->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      main->context_state = CTX_PREPARE_FOR_IMCU;
+      main->iMCU_row_ctr = 0;
+    } else {
+      /* Simple case with no context needed */
+      main->pub.process_data = process_data_simple_main;
+    }
+    main->buffer_full = FALSE;	/* Mark buffer empty */
+    main->rowgroup_ctr = 0;
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    main->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo,
+			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			  JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  JDIMENSION rowgroups_avail;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
+  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, main->buffer,
+				     &main->rowgroup_ctr, rowgroups_avail,
+				     output_buf, out_row_ctr, out_rows_avail);
+
+  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
+  if (main->rowgroup_ctr >= rowgroups_avail) {
+    main->buffer_full = FALSE;
+    main->rowgroup_ctr = 0;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo,
+			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   main->xbuffer[main->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    main->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (main->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    main->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    main->rowgroup_ctr = 0;
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    main->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (main->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    main->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
+    main->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo,
+			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			 JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+				     (JDIMENSION *) NULL, (JDIMENSION) 0,
+				     output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_d_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_scaled_size + 2;
+  } else {
+    ngroups = cinfo->min_DCT_scaled_size;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 compptr->width_in_blocks * compptr->DCT_scaled_size,
+			 (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/JPEG/jdmarker.cpp b/JPEG/jdmarker.cpp
new file mode 100644
index 0000000..f4cca8c
--- /dev/null
+++ b/JPEG/jdmarker.cpp
@@ -0,0 +1,1360 @@
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+  
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, compptr->component_id, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int i, ci, n, c, cc;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOS_NO_SOF);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n < 1 || n > MAX_COMPS_IN_SCAN)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, cc, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (cc == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+    
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another SOS marker */
+  cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length;
+  int n, i, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= DCTSIZE2+1;
+    if (prec) length -= DCTSIZE2;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1, anything else signals an incompatible change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      if (! get_sof(cinfo, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+    
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+      
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_marker_reader));
+  cinfo->marker = (struct jpeg_marker_reader *) marker;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/JPEG/jdmaster.cpp b/JPEG/jdmaster.cpp
new file mode 100644
index 0000000..2802c5b
--- /dev/null
+++ b/JPEG/jdmaster.cpp
@@ -0,0 +1,557 @@
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling */
+  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE)
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
+    /* Provide 1/8 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 8L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 8L);
+    cinfo->min_DCT_scaled_size = 1;
+  } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
+    /* Provide 1/4 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 4L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 4L);
+    cinfo->min_DCT_scaled_size = 2;
+  } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
+    /* Provide 1/2 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 2L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 2L);
+    cinfo->min_DCT_scaled_size = 4;
+  } else {
+    /* Provide 1/1 scaling */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    cinfo->min_DCT_scaled_size = DCTSIZE;
+  }
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code assumes that the supported DCT scalings are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = cinfo->min_DCT_scaled_size;
+    while (ssize < DCTSIZE &&
+	   (compptr->h_samp_factor * ssize * 2 <=
+	    cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
+	   (compptr->v_samp_factor * ssize * 2 <=
+	    cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_scaled_size = ssize;
+  }
+
+  /* Recompute downsampled dimensions of components;
+   * application needs to know these if using raw downsampled data.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+#endif /* else share code with YCbCr */
+  case JCS_YCbCr:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+    cinfo->out_color_components = 4;
+    break;
+  default:			/* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
+ * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
+ * limiting step (just after the IDCT), a wildly out-of-range value is 
+ * possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = range_limit[x & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * So the post-IDCT limiting table ends up looking like this:
+ *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
+ *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0,1,...,CENTERJSAMPLE-1
+ * Negative inputs select values from the upper half of the table after
+ * masking.
+ *
+ * We can save some space by overlapping the start of the post-IDCT table
+ * with the simpler range limiting table.  The post-IDCT table begins at
+ * sample_range_limit + CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+  cinfo->sample_range_limit = table;
+  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  /* Main part of "simple" table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  /* End of simple table, rest of first half of post-IDCT table */
+  for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
+    table[i] = MAXJSAMPLE;
+  /* Second half of post-IDCT table */
+  MEMZERO(table + (2 * (MAXJSAMPLE+1)),
+	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
+	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_decomp_master));
+  cinfo->master = (struct jpeg_decomp_master *) master;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/JPEG/jdmerge.cpp b/JPEG/jdmerge.cpp
new file mode 100644
index 0000000..3744446
--- /dev/null
+++ b/JPEG/jdmerge.cpp
@@ -0,0 +1,400 @@
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCbCr => RGB color conversion only.
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+		      1, upsample->out_row_width);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/JPEG/jdphuff.cpp b/JPEG/jdphuff.cpp
new file mode 100644
index 0000000..2267809
--- /dev/null
+++ b/JPEG/jdphuff.cpp
@@ -0,0 +1,668 @@
+/*
+ * jdphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines for progressive JPEG.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdhuff.c */
+
+
+#ifdef D_PROGRESSIVE_SUPPORTED
+
+/*
+ * Expanded entropy decoder object for progressive Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+} phuff_entropy_decoder;
+
+typedef phuff_entropy_decoder * phuff_entropy_ptr;
+
+/* Forward declarations */
+METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band, bad;
+  int ci, coefi, tbl;
+  int *coef_bit_ptr;
+  jpeg_component_info * compptr;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* Validate scan parameters */
+  bad = FALSE;
+  if (is_DC_band) {
+    if (cinfo->Se != 0)
+      bad = TRUE;
+  } else {
+    /* need not check Ss/Se < 0 since they came from unsigned bytes */
+    if (cinfo->Ss > cinfo->Se || cinfo->Se >= DCTSIZE2)
+      bad = TRUE;
+    /* AC scans may have only one component */
+    if (cinfo->comps_in_scan != 1)
+      bad = TRUE;
+  }
+  if (cinfo->Ah != 0) {
+    /* Successive approximation refinement scan: must have Al = Ah-1. */
+    if (cinfo->Al != cinfo->Ah-1)
+      bad = TRUE;
+  }
+  if (cinfo->Al > 13)		/* need not check for < 0 */
+    bad = TRUE;
+  /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+   * but the spec doesn't say so, and we try to be liberal about what we
+   * accept.  Note: large Al values could result in out-of-range DC
+   * coefficients during early scans, leading to bizarre displays due to
+   * overflows in the IDCT math.  But we won't crash.
+   */
+  if (bad)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	     cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+  /* Update progression status, and verify that scan order is legal.
+   * Note that inter-scan inconsistencies are treated as warnings
+   * not fatal errors ... not clear if this is right way to behave.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    int cindex = cinfo->cur_comp_info[ci]->component_index;
+    coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+    if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+      WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+    for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+      int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+      if (cinfo->Ah != expected)
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+      coef_bit_ptr[coefi] = cinfo->Al;
+    }
+  }
+
+  /* Select MCU decoding routine */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_first;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_refine;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_refine;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Make sure requested tables are present, and compute derived tables.
+     * We may build same derived table more than once, but it's not expensive.
+     */
+    if (is_DC_band) {
+      if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	tbl = compptr->dc_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				& entropy->derived_tbls[tbl]);
+      }
+    } else {
+      tbl = compptr->ac_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+			      & entropy->derived_tbls[tbl]);
+      /* remember the single active table */
+      entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize private state variables */
+  entropy->saved.EOBRUN = 0;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN > 0)		/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r == 15) {	/* ZRL */
+	    k += 15;		/* skip 15 zeroes in band */
+	  } else {		/* EOBr, run length is 2^r + appended bits */
+	    EOBRUN = 1 << r;
+	    if (r) {		/* EOBr, r > 0 */
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    EOBRUN--;		/* this band is processed at this moment */
+	    break;		/* force end-of-band */
+	  }
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int blkn;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      (*block)[0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + jpeg_natural_order[k];
+	  if (*thiscoef != 0) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = jpeg_natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+      }
+    }
+
+    if (EOBRUN > 0) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      for (; k <= Se; k++) {
+	thiscoef = *block + jpeg_natural_order[k];
+	if (*thiscoef != 0) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+      }
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz > 0)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int *coef_bit_ptr;
+  int ci, i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff_decoder;
+
+  /* Mark derived tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+  }
+
+  /* Create progression status table */
+  cinfo->coef_bits = (int (*)[DCTSIZE2])
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components*DCTSIZE2*SIZEOF(int));
+  coef_bit_ptr = & cinfo->coef_bits[0][0];
+  for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (i = 0; i < DCTSIZE2; i++)
+      *coef_bit_ptr++ = -1;
+}
+
+#endif /* D_PROGRESSIVE_SUPPORTED */
diff --git a/JPEG/jdpostct.cpp b/JPEG/jdpostct.cpp
new file mode 100644
index 0000000..571563d
--- /dev/null
+++ b/JPEG/jdpostct.cpp
@@ -0,0 +1,290 @@
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/JPEG/jdsample.cpp b/JPEG/jdsample.cpp
new file mode 100644
index 0000000..80ffefb
--- /dev/null
+++ b/JPEG/jdsample.cpp
@@ -0,0 +1,478 @@
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = NULL;	/* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+			v_expand-1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+		      1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+ *
+ * The upsampling algorithm is linear interpolation between pixel centers,
+ * also known as a "triangle filter".  This is a good compromise between
+ * speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+ * of the way between input pixel centers.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register int invalue;
+  register JDIMENSION colctr;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    /* Special case for first column */
+    invalue = GETJSAMPLE(*inptr++);
+    *outptr++ = (JSAMPLE) invalue;
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+
+    for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+      /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
+      invalue = GETJSAMPLE(*inptr++) * 3;
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+    }
+
+    /* Special case for last column */
+    invalue = GETJSAMPLE(*inptr);
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+    *outptr++ = (JSAMPLE) invalue;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * Again a triangle filter; see comments for h2v1 case, above.
+ *
+ * It is OK for us to reference the adjacent input rows because we demanded
+ * context from the main buffer controller (see initialization code).
+ */
+
+METHODDEF(void)
+h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr0, inptr1, outptr;
+#if BITS_IN_JSAMPLE == 8
+  register int thiscolsum, lastcolsum, nextcolsum;
+#else
+  register INT32 thiscolsum, lastcolsum, nextcolsum;
+#endif
+  register JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0)		/* next nearest is row above */
+	inptr1 = input_data[inrow-1];
+      else			/* next nearest is row below */
+	inptr1 = input_data[inrow+1];
+      outptr = output_data[outrow++];
+
+      /* Special case for first column */
+      thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+      lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+
+      for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+	/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+	/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+	nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+	lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+      }
+
+      /* Special case for last column */
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 7) >> 4);
+    }
+    inrow++;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean need_buffer, do_fancy;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
+   * so don't ask for it.
+   */
+  do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    need_buffer = TRUE;
+    if (! compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group == v_out_group) {
+      /* Special cases for 2h1v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2)
+	upsample->methods[ci] = h2v1_fancy_upsample;
+      else
+	upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special cases for 2h2v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2) {
+	upsample->methods[ci] = h2v2_fancy_upsample;
+	upsample->pub.need_context_rows = TRUE;
+      } else
+	upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    if (need_buffer) {
+      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) jround_up((long) cinfo->output_width,
+				(long) cinfo->max_h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/JPEG/jdtrans.cpp b/JPEG/jdtrans.cpp
new file mode 100644
index 0000000..6c0ab71
--- /dev/null
+++ b/JPEG/jdtrans.cpp
@@ -0,0 +1,143 @@
+/*
+ * jdtrans.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+	return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+	break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	}
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;			/* keep compiler happy */
+}
+
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection (j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/JPEG/jerror.cpp b/JPEG/jerror.cpp
new file mode 100644
index 0000000..3da7be8
--- /dev/null
+++ b/JPEG/jerror.cpp
@@ -0,0 +1,252 @@
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(void)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/JPEG/jerror.h b/JPEG/jerror.h
new file mode 100644
index 0000000..fc2fffe
--- /dev/null
+++ b/JPEG/jerror.h
@@ -0,0 +1,291 @@
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_ARITH_NOTIMPL,
+	 "Sorry, there are legal restrictions on arithmetic coding")
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_SOS_NO_SOF, "Invalid JPEG file structure: SOS before SOF")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/JPEG/jfdctflt.cpp b/JPEG/jfdctflt.cpp
new file mode 100644
index 0000000..79d7a00
--- /dev/null
+++ b/JPEG/jfdctflt.cpp
@@ -0,0 +1,168 @@
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_float (FAST_FLOAT * data)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/JPEG/jfdctfst.cpp b/JPEG/jfdctfst.cpp
new file mode 100644
index 0000000..ccb378a
--- /dev/null
+++ b/JPEG/jfdctfst.cpp
@@ -0,0 +1,224 @@
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/JPEG/jfdctint.cpp b/JPEG/jfdctint.cpp
new file mode 100644
index 0000000..0a78b64
--- /dev/null
+++ b/JPEG/jfdctint.cpp
@@ -0,0 +1,283 @@
+/*
+ * jfdctint.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is INT32 anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_islow (DCTELEM * data)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+				   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+				   CONST_BITS-PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+    
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+					   CONST_BITS+PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
+					   CONST_BITS+PASS1_BITS);
+    
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/JPEG/jidctflt.cpp b/JPEG/jidctflt.cpp
new file mode 100644
index 0000000..0188ce3
--- /dev/null
+++ b/JPEG/jidctflt.cpp
@@ -0,0 +1,242 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*4] = tmp3 + tmp4;
+    wsptr[DCTSIZE*3] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+    
+    /* Even part */
+
+    tmp10 = wsptr[0] + wsptr[4];
+    tmp11 = wsptr[0] - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/JPEG/jidctfst.cpp b/JPEG/jidctfst.cpp
new file mode 100644
index 0000000..dba4216
--- /dev/null
+++ b/JPEG/jidctfst.cpp
@@ -0,0 +1,368 @@
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/* Like DESCALE, but applies to a DCTELEM and produces an int.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+#ifdef USE_ACCURATE_ROUNDING
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
+#else
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IDESCALE */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+
+    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
+	    - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/JPEG/jidctint.cpp b/JPEG/jidctint.cpp
new file mode 100644
index 0000000..a72b320
--- /dev/null
+++ b/JPEG/jidctint.cpp
@@ -0,0 +1,389 @@
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
+    tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/JPEG/jidctred.cpp b/JPEG/jidctred.cpp
new file mode 100644
index 0000000..421f3c7
--- /dev/null
+++ b/JPEG/jidctred.cpp
@@ -0,0 +1,398 @@
+/*
+ * jidctred.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains inverse-DCT routines that produce reduced-size output:
+ * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
+ *
+ * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
+ * algorithm used in jidctint.c.  We simply replace each 8-to-8 1-D IDCT step
+ * with an 8-to-4 step that produces the four averages of two adjacent outputs
+ * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
+ * These steps were derived by computing the corresponding values at the end
+ * of the normal LL&M code, then simplifying as much as possible.
+ *
+ * 1x1 is trivial: just take the DC coefficient divided by 8.
+ *
+ * See jidctint.c for additional comments.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling is the same as in jidctint.c. */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_211164243  ((INT32)  1730)	/* FIX(0.211164243) */
+#define FIX_0_509795579  ((INT32)  4176)	/* FIX(0.509795579) */
+#define FIX_0_601344887  ((INT32)  4926)	/* FIX(0.601344887) */
+#define FIX_0_720959822  ((INT32)  5906)	/* FIX(0.720959822) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_850430095  ((INT32)  6967)	/* FIX(0.850430095) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_061594337  ((INT32)  8697)	/* FIX(1.061594337) */
+#define FIX_1_272758580  ((INT32)  10426)	/* FIX(1.272758580) */
+#define FIX_1_451774981  ((INT32)  11893)	/* FIX(1.451774981) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_2_172734803  ((INT32)  17799)	/* FIX(2.172734803) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_624509785  ((INT32)  29692)	/* FIX(3.624509785) */
+#else
+#define FIX_0_211164243  FIX(0.211164243)
+#define FIX_0_509795579  FIX(0.509795579)
+#define FIX_0_601344887  FIX(0.601344887)
+#define FIX_0_720959822  FIX(0.720959822)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_850430095  FIX(0.850430095)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_061594337  FIX(1.061594337)
+#define FIX_1_272758580  FIX(1.272758580)
+#define FIX_1_451774981  FIX(1.451774981)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_172734803  FIX(2.172734803)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_624509785  FIX(3.624509785)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process column 4, because second pass won't use it */
+    if (ctr == DCTSIZE-4)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
+	inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine term 4 for 4x4 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= (CONST_BITS+1);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
+  }
+  
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
+    
+    tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+	 + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = (INT32) wsptr[7];
+    z2 = (INT32) wsptr[5];
+    z3 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[1];
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp10, z1;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process columns 2,4,6 */
+    if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 = z1 << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
+  }
+  
+  /* Pass 2: process 2 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
+	 + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+	 + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+	 + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  int dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* We hardly need an inverse DCT routine for this: just take the
+   * average pixel value, which is one-eighth of the DC coefficient.
+   */
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  dcval = (int) DESCALE((INT32) dcval, 3);
+
+  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
diff --git a/JPEG/jinclude.h b/JPEG/jinclude.h
new file mode 100644
index 0000000..0a4f151
--- /dev/null
+++ b/JPEG/jinclude.h
@@ -0,0 +1,91 @@
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ */
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
diff --git a/JPEG/jmemmgr.cpp b/JPEG/jmemmgr.cpp
new file mode 100644
index 0000000..d801b32
--- /dev/null
+++ b/JPEG/jmemmgr.cpp
@@ -0,0 +1,1118 @@
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  long total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(void)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char * data_ptr;
+  size_t odd_bytes, min_request, slop;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
+      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+		  * SIZEOF(JSAMPLE)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+		  * SIZEOF(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long space_per_minheight, maximum_space, avail_mem;
+  long minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) sptr->maxaccess *
+			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      maximum_space += (long) sptr->rows_in_array *
+		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) bptr->maxaccess *
+			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      maximum_space += (long) bptr->rows_in_array *
+		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = & mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/JPEG/jmemnobs.cpp b/JPEG/jmemnobs.cpp
new file mode 100644
index 0000000..eb8c337
--- /dev/null
+++ b/JPEG/jmemnobs.cpp
@@ -0,0 +1,109 @@
+/*
+ * jmemnobs.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ * Note that the max_memory_to_use option is ignored by this implementation.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * Here we always say, "we got all you want bud!"
+ */
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return max_bytes_needed;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return 0;			/* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/JPEG/jmemsys.h b/JPEG/jmemsys.h
new file mode 100644
index 0000000..6c3c6d3
--- /dev/null
+++ b/JPEG/jmemsys.h
@@ -0,0 +1,198 @@
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/JPEG/jmorecfg.h b/JPEG/jmorecfg.h
new file mode 100644
index 0000000..04c6ac6
--- /dev/null
+++ b/JPEG/jmorecfg.h
@@ -0,0 +1,366 @@
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+//typedef long INT32;
+typedef int INT32;
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+// WARNING: Misha changed here
+#ifndef HAVE_BOOLEAN
+//typedef int boolean;
+typedef unsigned char boolean;
+#endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Arithmetic coding is unsupported for legal reasons.  Complaints to IBM. */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#undef  C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected 12-bit data precision, it is dangerous to turn off
+ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
+ * precision, so jchuff.c normally uses entropy optimization to compute
+ * usable tables for higher precision.  If you don't want to do optimization,
+ * you'll have to supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#undef  D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * change these macros.  You can also deal with formats such as R,G,B,X
+ * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
+ * the offsets will also change the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not
+ *    useful if you are using JPEG color spaces other than YCbCr or grayscale.
+ * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).  So you
+ *    can't use color quantization if you change that value.
+ */
+
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/JPEG/jpegint.h b/JPEG/jpegint.h
new file mode 100644
index 0000000..95b00d4
--- /dev/null
+++ b/JPEG/jpegint.h
@@ -0,0 +1,392 @@
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* perhaps this should be an array??? */
+  JMETHOD(void, forward_DCT, (j_compress_ptr cinfo,
+			      jpeg_component_info * compptr,
+			      JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+			      JDIMENSION start_row, JDIMENSION start_col,
+			      JDIMENSION num_blocks));
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
+				JBLOCKROW *MCU_data));
+
+  /* This is here to share code between baseline and progressive decoders; */
+  /* other modules probably should not use it */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_phuff_encoder	jIPHEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_phuff_decoder	jIPHDecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jzero_far		jZeroFar
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
+				    JSAMPARRAY output_array, int dest_row,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/JPEG/jpeglib.h b/JPEG/jpeglib.h
new file mode 100644
index 0000000..d1be8dd
--- /dev/null
+++ b/JPEG/jpeglib.h
@@ -0,0 +1,1096 @@
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+/* Version ID for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
+ */
+
+#define JPEG_LIB_VERSION  62	/* Version 6b */
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard, so don't change them
+ * if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 samples */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+  
+  /* Remaining fields should be treated as private by applications. */
+  
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples.  Always DCTSIZE for compression.
+   * For decompression this is the size of the output from one DCT block,
+   * reflecting any scaling we choose to apply during the IDCT step.
+   * Values of 1,2,4,8 are likely to be supported.  Note that different
+   * components may receive different IDCT scalings.
+   */
+  int DCT_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface), thus
+   * downsampled_width = ceil(image_width * Hi/Hmax)
+   * and similarly for height.  For decompression, IDCT scaling is included, so
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* This flag is used only for decompression.  In cases where some of the
+   * components will be ignored (eg grayscale output from YCbCr image),
+   * we can skip most computations for the unused components.
+   */
+  boolean component_needed;	/* do we need the value of this component? */
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;		/* MCU width in samples, MCU_width*DCT_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK		/* Y/Cb/Cr/K */
+} J_COLOR_SPACE;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+  
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+  
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+  
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+  
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCTSIZE sample rows of each component in an
+   * "iMCU" (interleaved MCU) row.
+   */
+  
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+  
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+  
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_scaled_size;	/* smallest DCT_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(void, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+  
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+  
+  /* Standard state variables for error facility */
+  
+  int trace_level;		/* max msg_level that will be displayed */
+  
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15 
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.doc concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/JPEG/jquant1.cpp b/JPEG/jquant1.cpp
new file mode 100644
index 0000000..b2f96aa
--- /dev/null
+++ b/JPEG/jquant1.cpp
@@ -0,0 +1,856 @@
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      jzero_far((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/JPEG/jquant2.cpp b/JPEG/jquant2.cpp
new file mode 100644
index 0000000..af601e3
--- /dev/null
+++ b/JPEG/jquant2.cpp
@@ -0,0 +1,1310 @@
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				   (3 * SIZEOF(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      jzero_far((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      jzero_far((void FAR *) histogram[i],
+		HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/JPEG/jutils.cpp b/JPEG/jutils.cpp
new file mode 100644
index 0000000..d18a955
--- /dev/null
+++ b/JPEG/jutils.cpp
@@ -0,0 +1,179 @@
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macros */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+		   JSAMPARRAY output_array, int dest_row,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
+
+
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+#ifdef FMEMZERO
+  FMEMZERO(target, bytestozero);
+#else
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+#endif
+}
diff --git a/JPEG/jversion.h b/JPEG/jversion.h
new file mode 100644
index 0000000..6472c58
--- /dev/null
+++ b/JPEG/jversion.h
@@ -0,0 +1,14 @@
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"6b  27-Mar-1998"
+
+#define JCOPYRIGHT	"Copyright (C) 1998, Thomas G. Lane"
diff --git a/Makefile b/Makefile
index f71afaf..131474e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,71 +1,155 @@
 PR_TARGET=PoissonRecon
 SR_TARGET=SSDRecon
 ST_TARGET=SurfaceTrimmer
-PR_SOURCE=CmdLineParser.cpp Factor.cpp Geometry.cpp MarchingCubes.cpp PlyFile.cpp PoissonRecon.cpp
-SR_SOURCE=CmdLineParser.cpp Factor.cpp Geometry.cpp MarchingCubes.cpp PlyFile.cpp SSDRecon.cpp
-ST_SOURCE=CmdLineParser.cpp Factor.cpp Geometry.cpp MarchingCubes.cpp PlyFile.cpp SurfaceTrimmer.cpp
-
-CFLAGS += -fopenmp -Wno-deprecated -Wno-write-strings -std=c++11
-LFLAGS += -lgomp -lstdc++
+EH_TARGET=EDTInHeat
+IS_TARGET=ImageStitching
+AV_TARGET=AdaptiveTreeVisualization
+PR_SOURCE=PlyFile.cpp PoissonRecon.cpp
+SR_SOURCE=PlyFile.cpp SSDRecon.cpp
+ST_SOURCE=PlyFile.cpp SurfaceTrimmer.cpp
+EH_SOURCE=PlyFile.cpp EDTInHeat.cpp
+IS_SOURCE=ImageStitching.cpp
+AV_SOURCE=PlyFile.cpp AdaptiveTreeVisualization.cpp
+
+#COMPILER = gcc
+COMPILER = clang
+
+ifeq ($(COMPILER),gcc)
+	CFLAGS += -fopenmp -Wno-deprecated -std=c++11 -Wno-invalid-offsetof
+	LFLAGS += -lgomp -lstdc++
+else
+# 	CFLAGS += -fopenmp=libiomp5 -Wno-deprecated -Wno-write-strings -std=c++11 -Wno-invalid-offsetof
+# 	LFLAGS += -liomp5 -lstdc++
+	CFLAGS += -Wno-deprecated -std=c++11 -Wno-invalid-offsetof
+	LFLAGS += -lstdc++
+endif
+#LFLAGS += -lz -lpng -ljpeg
 
 CFLAGS_DEBUG = -DDEBUG -g3
 LFLAGS_DEBUG =
 
-CFLAGS_RELEASE = -O3 -DRELEASE -funroll-loops -ffast-math
-LFLAGS_RELEASE = -O3 
+CFLAGS_RELEASE = -O3 -DRELEASE -funroll-loops -ffast-math -g
+LFLAGS_RELEASE = -O3 -g
 
 SRC = Src/
 BIN = Bin/Linux/
-INCLUDE = /usr/include/
+#INCLUDE = /usr/include/
+INCLUDE = .
+
+ifeq ($(COMPILER),gcc)
+	CC=gcc
+	CXX=g++
+else
+	CC=clang
+	CXX=clang++
+#	CC=clang-3.5
+#	CXX=clang++-3.5
+endif
 
-CC=gcc
-CXX=g++
 MD=mkdir
 
 PR_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(PR_SOURCE))))
 SR_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(SR_SOURCE))))
 ST_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(ST_SOURCE))))
+EH_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(EH_SOURCE))))
+IS_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(IS_SOURCE))))
+AV_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(AV_SOURCE))))
 
 
-all: CFLAGS += $(CFLAGS_DEBUG)
-all: LFLAGS += $(LFLAGS_DEBUG)
-all: $(BIN)
+all: CFLAGS += $(CFLAGS_RELEASE)
+all: LFLAGS += $(LFLAGS_RELEASE)
+all: make_dir
 all: $(BIN)$(PR_TARGET)
 all: $(BIN)$(SR_TARGET)
 all: $(BIN)$(ST_TARGET)
-
-release: CFLAGS += $(CFLAGS_RELEASE)
-release: LFLAGS += $(LFLAGS_RELEASE)
-release: $(BIN)
-release: $(BIN)$(PR_TARGET)
-release: $(BIN)$(SR_TARGET)
-release: $(BIN)$(ST_TARGET)
+all: $(BIN)$(EH_TARGET)
+all: $(BIN)$(IS_TARGET)
+all: $(BIN)$(AV_TARGET)
+
+debug: CFLAGS += $(CFLAGS_DEBUG)
+debug: LFLAGS += $(LFLAGS_DEBUG)
+debug: make_dir
+debug: $(BIN)$(PR_TARGET)
+debug: $(BIN)$(SR_TARGET)
+debug: $(BIN)$(ST_TARGET)
+debug: $(BIN)$(EH_TARGET)
+debug: $(BIN)$(IS_TARGET)
+debug: $(BIN)$(AV_TARGET)
+
+poissonrecon: CFLAGS += $(CFLAGS_RELEASE)
+poissonrecon: LFLAGS += $(LFLAGS_RELEASE)
+poissonrecon: make_dir
+poissonrecon: $(BIN)$(PR_TARGET)
+
+ssdrecon: CFLAGS += $(CFLAGS_RELEASE)
+ssdrecon: LFLAGS += $(LFLAGS_RELEASE)
+ssdrecon: make_dir
+ssdrecon: $(BIN)$(SR_TARGET)
+
+surfacetrimmer: CFLAGS += $(CFLAGS_RELEASE)
+surfacetrimmer: LFLAGS += $(LFLAGS_RELEASE)
+surfacetrimmer: make_dir
+surfacetrimmer: $(BIN)$(ST_TARGET)
+
+edtinheat: CFLAGS += $(CFLAGS_RELEASE)
+edtinheat: LFLAGS += $(LFLAGS_RELEASE)
+edtinheat: make_dir
+edtinheat: $(BIN)$(EH_TARGET)
+
+imagestitching: CFLAGS += $(CFLAGS_RELEASE)
+imagestitching: LFLAGS += $(LFLAGS_RELEASE)
+imagestitching: make_dir
+imagestitching: $(BIN)$(IS_TARGET)
+
+octreevisualization: CFLAGS += $(CFLAGS_RELEASE)
+octreevisualization: LFLAGS += $(LFLAGS_RELEASE)
+octreevisualization: make_dir
+octreevisualization: $(BIN)$(AV_TARGET)
 
 clean:
-	rm -f $(BIN)$(PR_TARGET)
-	rm -f $(BIN)$(SR_TARGET)
-	rm -f $(BIN)$(ST_TARGET)
-	rm -f $(PR_OBJECTS)
-	rm -f $(SR_OBJECTS)
-	rm -f $(ST_OBJECTS)
-
-$(BIN):
+	rm -rf $(BIN)$(PR_TARGET)
+	rm -rf $(BIN)$(SR_TARGET)
+	rm -rf $(BIN)$(ST_TARGET)
+	rm -rf $(BIN)$(EH_TARGET)
+	rm -rf $(BIN)$(IS_TARGET)
+	rm -rf $(BIN)$(AV_TARGET)
+	rm -rf $(PR_OBJECTS)
+	rm -rf $(SR_OBJECTS)
+	rm -rf $(ST_OBJECTS)
+	rm -rf $(EH_OBJECTS)
+	rm -rf $(IS_OBJECTS)
+	rm -rf $(AV_OBJECTS)
+	cd PNG  && make clean
+
+
+make_dir:
 	$(MD) -p $(BIN)
 
 $(BIN)$(PR_TARGET): $(PR_OBJECTS)
-	$(CXX) -o $@ $(PR_OBJECTS) $(LFLAGS)
+	cd PNG  && make
+	$(CXX) -o $@ $(PR_OBJECTS) -L$(BIN) $(LFLAGS) -ljpeg -lmypng -lz
 
 $(BIN)$(SR_TARGET): $(SR_OBJECTS)
-	$(CXX) -o $@ $(SR_OBJECTS) $(LFLAGS)
+	cd PNG  && make
+	$(CXX) -o $@ $(SR_OBJECTS) -L$(BIN) $(LFLAGS) -ljpeg -lmypng -lz
 
 $(BIN)$(ST_TARGET): $(ST_OBJECTS)
 	$(CXX) -o $@ $(ST_OBJECTS) $(LFLAGS)
 
+$(BIN)$(EH_TARGET): $(EH_OBJECTS)
+	$(CXX) -o $@ $(EH_OBJECTS) $(LFLAGS)
+
+$(BIN)$(IS_TARGET): $(IS_OBJECTS)
+	cd PNG  && make
+	$(CXX) -o $@ $(IS_OBJECTS) -L$(BIN) $(LFLAGS) -ljpeg -lmypng -lz
+
+$(BIN)$(AV_TARGET): $(AV_OBJECTS)
+	cd PNG  && make
+	$(CXX) -o $@ $(AV_OBJECTS) -L$(BIN) $(LFLAGS) -ljpeg -lmypng -lz
+
 $(BIN)%.o: $(SRC)%.c
-	mkdir -p $(BIN)
-	$(CC) -c -o $@ $(CFLAGS) -I$(INCLUDE) $<
+	$(CC) -c -o $@ -I$(INCLUDE) $<
 
 $(BIN)%.o: $(SRC)%.cpp
-	mkdir -p $(BIN)
 	$(CXX) -c -o $@ $(CFLAGS) -I$(INCLUDE) $<
 
diff --git a/PNG.vcxproj b/PNG.vcxproj
new file mode 100644
index 0000000..590e080
--- /dev/null
+++ b/PNG.vcxproj
@@ -0,0 +1,174 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="PNG\png.c" />
+    <ClCompile Include="PNG\pngerror.c" />
+    <ClCompile Include="PNG\pnggccrd.c" />
+    <ClCompile Include="PNG\pngget.c" />
+    <ClCompile Include="PNG\pngmem.c" />
+    <ClCompile Include="PNG\pngpread.c" />
+    <ClCompile Include="PNG\pngread.c" />
+    <ClCompile Include="PNG\pngrio.c" />
+    <ClCompile Include="PNG\pngrtran.c" />
+    <ClCompile Include="PNG\pngrutil.c" />
+    <ClCompile Include="PNG\pngset.c" />
+    <ClCompile Include="PNG\pngtest.c" />
+    <ClCompile Include="PNG\pngtrans.c" />
+    <ClCompile Include="PNG\pngvcrd.c" />
+    <ClCompile Include="PNG\pngwio.c" />
+    <ClCompile Include="PNG\pngwrite.c" />
+    <ClCompile Include="PNG\pngwtran.c" />
+    <ClCompile Include="PNG\pngwutil.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="PNG\png.h" />
+    <ClInclude Include="PNG\pngasmrd.h" />
+    <ClInclude Include="PNG\pngconf.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{B5899B32-FAC2-477E-99AA-86736B97F2FC}</ProjectGuid>
+    <RootNamespace>PNG</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)Intermediate\$(ProjectName)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/PNG/Makefile b/PNG/Makefile
new file mode 100644
index 0000000..62b0426
--- /dev/null
+++ b/PNG/Makefile
@@ -0,0 +1,64 @@
+PNG_TARGET=libmypng.a
+PNG_SOURCE=png.c pngerror.c pnggccrd.c pngget.c pngmem.c pngpread.c pngread.c pngrio.c pngrtran.c pngrutil.c pngset.c pngtest.c pngtrans.c pngvcrd.c pngwio.c pngwrite.c pngwtran.c pngwutil.c
+
+COMPILER = gcc
+#COMPILER = clang
+
+CFLAGS += -Wno-deprecated -Wno-write-strings
+
+CFLAGS_DEBUG = -DDEBUG -g3
+LFLAGS_DEBUG =
+
+CFLAGS_RELEASE = -O3 -DRELEASE -funroll-loops -ffast-math -g
+LFLAGS_RELEASE = -O3 -g
+
+SRC = ./
+BIN = ../Bin/Linux/
+INCLUDE = ../
+
+ifeq ($(COMPILER),gcc)
+	CC=gcc
+	CXX=g++
+else
+	CC=clang-3.8
+	CXX=clang++-3.8
+#	CC=clang-3.5
+#	CXX=clang++-3.5
+endif
+
+MD=mkdir
+
+PNG_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(PNG_SOURCE))))
+
+
+all: CFLAGS += $(CFLAGS_RELEASE)
+all: LFLAGS += $(LFLAGS_RELEASE)
+all: make_dir
+all: $(BIN)$(PNG_TARGET)
+
+debug: CFLAGS += $(CFLAGS_DEBUG)
+debug: LFLAGS += $(LFLAGS_DEBUG)
+debug: make_dir
+debug: $(BIN)$(PNG_TARGET)
+
+png: CFLAGS += $(CFLAGS_RELEASE)
+png: LFLAGS += $(LFLAGS_RELEASE)
+png: make_dir
+png: $(BIN)$(PNG_TARGET)
+
+clean:
+	rm -rf $(BIN)$(PNG_TARGET)
+	rm -rf $(PNG_OBJECTS)
+
+make_dir:
+	$(MD) -p $(BIN)
+
+$(BIN)$(PNG_TARGET): $(PNG_OBJECTS)
+	ar rcs $(BIN)$(PNG_TARGET) $(PNG_OBJECTS)
+
+$(BIN)%.o: $(SRC)%.c
+	$(CC) -c -o $@ -I$(INCLUDE) $<
+
+$(BIN)%.o: $(SRC)%.cpp
+	$(CXX) -c -o $@ $(CFLAGS) -I$(INCLUDE) $<
+
diff --git a/PNG/png.c b/PNG/png.c
new file mode 100644
index 0000000..63ae86e
--- /dev/null
+++ b/PNG/png.c
@@ -0,0 +1,798 @@
+
+/* png.c - location for general purpose libpng functions
+ *
+ * Last changed in libpng 1.2.21 October 4, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#define PNG_NO_EXTERN
+#include "png.h"
+
+/* Generate a compiler error if there is an old png.h in the search path. */
+typedef version_1_2_29 Your_png_h_is_not_version_1_2_29;
+
+/* Version information for C files.  This had better match the version
+ * string defined in png.h.  */
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+/* png_libpng_ver was changed to a function in version 1.0.5c */
+PNG_CONST char png_libpng_ver[18] = PNG_LIBPNG_VER_STRING;
+
+#ifdef PNG_READ_SUPPORTED
+
+/* png_sig was changed to a function in version 1.0.5c */
+/* Place to hold the signature string for a PNG file. */
+PNG_CONST png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+#endif /* PNG_READ_SUPPORTED */
+
+/* Invoke global declarations for constant strings for known chunk types */
+PNG_IHDR;
+PNG_IDAT;
+PNG_IEND;
+PNG_PLTE;
+PNG_bKGD;
+PNG_cHRM;
+PNG_gAMA;
+PNG_hIST;
+PNG_iCCP;
+PNG_iTXt;
+PNG_oFFs;
+PNG_pCAL;
+PNG_sCAL;
+PNG_pHYs;
+PNG_sBIT;
+PNG_sPLT;
+PNG_sRGB;
+PNG_tEXt;
+PNG_tIME;
+PNG_tRNS;
+PNG_zTXt;
+
+#ifdef PNG_READ_SUPPORTED
+/* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+/* start of interlace block */
+PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+
+/* offset to next interlace block */
+PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+
+/* start of interlace block in the y direction */
+PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+
+/* offset to next interlace block in the y direction */
+PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+
+/* Height of interlace block.  This is not currently used - if you need
+ * it, uncomment it here and in png.h
+PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+*/
+
+/* Mask to determine which pixels are valid in a pass */
+PNG_CONST int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+
+/* Mask to determine which pixels to overwrite while displaying */
+PNG_CONST int FARDATA png_pass_dsp_mask[]
+   = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
+
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_USE_GLOBAL_ARRAYS */
+
+/* Tells libpng that we have already handled the first "num_bytes" bytes
+ * of the PNG file signature.  If the PNG data is embedded into another
+ * stream we can set num_bytes = 8 so that libpng will not attempt to read
+ * or write any of the magic bytes before it starts on the IHDR.
+ */
+
+#ifdef PNG_READ_SUPPORTED
+void PNGAPI
+png_set_sig_bytes(png_structp png_ptr, int num_bytes)
+{
+   if(png_ptr == NULL) return;
+   png_debug(1, "in png_set_sig_bytes\n");
+   if (num_bytes > 8)
+      png_error(png_ptr, "Too many bytes for PNG signature.");
+
+   png_ptr->sig_bytes = (png_byte)(num_bytes < 0 ? 0 : num_bytes);
+}
+
+/* Checks whether the supplied bytes match the PNG signature.  We allow
+ * checking less than the full 8-byte signature so that those apps that
+ * already read the first few bytes of a file to determine the file type
+ * can simply check the remaining bytes for extra assurance.  Returns
+ * an integer less than, equal to, or greater than zero if sig is found,
+ * respectively, to be less than, to match, or be greater than the correct
+ * PNG signature (this is the same behaviour as strcmp, memcmp, etc).
+ */
+int PNGAPI
+png_sig_cmp(png_bytep sig, png_size_t start, png_size_t num_to_check)
+{
+   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+   if (num_to_check > 8)
+      num_to_check = 8;
+   else if (num_to_check < 1)
+      return (-1);
+
+   if (start > 7)
+      return (-1);
+
+   if (start + num_to_check > 8)
+      num_to_check = 8 - start;
+
+   return ((int)(png_memcmp(&sig[start], &png_signature[start], num_to_check)));
+}
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* (Obsolete) function to check signature bytes.  It does not allow one
+ * to check a partial signature.  This function might be removed in the
+ * future - use png_sig_cmp().  Returns true (nonzero) if the file is PNG.
+ */
+int PNGAPI
+png_check_sig(png_bytep sig, int num)
+{
+  return ((int)!png_sig_cmp(sig, (png_size_t)0, (png_size_t)num));
+}
+#endif
+#endif /* PNG_READ_SUPPORTED */
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+/* Function to allocate memory for zlib and clear it to 0. */
+#ifdef PNG_1_0_X
+voidpf PNGAPI
+#else
+voidpf /* private */
+#endif
+png_zalloc(voidpf png_ptr, uInt items, uInt size)
+{
+   png_voidp ptr;
+   png_structp p=(png_structp)png_ptr;
+   png_uint_32 save_flags=p->flags;
+   png_uint_32 num_bytes;
+
+   if(png_ptr == NULL) return (NULL);
+   if (items > PNG_UINT_32_MAX/size)
+   {
+     png_warning (p, "Potential overflow in png_zalloc()");
+     return (NULL);
+   }
+   num_bytes = (png_uint_32)items * size;
+
+   p->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
+   ptr = (png_voidp)png_malloc((png_structp)png_ptr, num_bytes);
+   p->flags=save_flags;
+
+#if defined(PNG_1_0_X) && !defined(PNG_NO_ZALLOC_ZERO)
+   if (ptr == NULL)
+       return ((voidpf)ptr);
+
+   if (num_bytes > (png_uint_32)0x8000L)
+   {
+      png_memset(ptr, 0, (png_size_t)0x8000L);
+      png_memset((png_bytep)ptr + (png_size_t)0x8000L, 0,
+         (png_size_t)(num_bytes - (png_uint_32)0x8000L));
+   }
+   else
+   {
+      png_memset(ptr, 0, (png_size_t)num_bytes);
+   }
+#endif
+   return ((voidpf)ptr);
+}
+
+/* function to free memory for zlib */
+#ifdef PNG_1_0_X
+void PNGAPI
+#else
+void /* private */
+#endif
+png_zfree(voidpf png_ptr, voidpf ptr)
+{
+   png_free((png_structp)png_ptr, (png_voidp)ptr);
+}
+
+/* Reset the CRC variable to 32 bits of 1's.  Care must be taken
+ * in case CRC is > 32 bits to leave the top bits 0.
+ */
+void /* PRIVATE */
+png_reset_crc(png_structp png_ptr)
+{
+   png_ptr->crc = crc32(0, Z_NULL, 0);
+}
+
+/* Calculate the CRC over a section of data.  We can only pass as
+ * much data to this routine as the largest single buffer size.  We
+ * also check that this data will actually be used before going to the
+ * trouble of calculating it.
+ */
+void /* PRIVATE */
+png_calculate_crc(png_structp png_ptr, png_bytep ptr, png_size_t length)
+{
+   int need_crc = 1;
+
+   if (png_ptr->chunk_name[0] & 0x20)                     /* ancillary */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+   else                                                    /* critical */
+   {
+      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
+         need_crc = 0;
+   }
+
+   if (need_crc)
+      png_ptr->crc = crc32(png_ptr->crc, ptr, (uInt)length);
+}
+
+/* Allocate the memory for an info_struct for the application.  We don't
+ * really need the png_ptr, but it could potentially be useful in the
+ * future.  This should be used in favour of malloc(png_sizeof(png_info))
+ * and png_info_init() so that applications that want to use a shared
+ * libpng don't have to be recompiled if png_info changes size.
+ */
+png_infop PNGAPI
+png_create_info_struct(png_structp png_ptr)
+{
+   png_infop info_ptr;
+
+   png_debug(1, "in png_create_info_struct\n");
+   if(png_ptr == NULL) return (NULL);
+#ifdef PNG_USER_MEM_SUPPORTED
+   info_ptr = (png_infop)png_create_struct_2(PNG_STRUCT_INFO,
+      png_ptr->malloc_fn, png_ptr->mem_ptr);
+#else
+   info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
+#endif
+   if (info_ptr != NULL)
+      png_info_init_3(&info_ptr, png_sizeof(png_info));
+
+   return (info_ptr);
+}
+
+/* This function frees the memory associated with a single info struct.
+ * Normally, one would use either png_destroy_read_struct() or
+ * png_destroy_write_struct() to free an info struct, but this may be
+ * useful for some applications.
+ */
+void PNGAPI
+png_destroy_info_struct(png_structp png_ptr, png_infopp info_ptr_ptr)
+{
+   png_infop info_ptr = NULL;
+   if(png_ptr == NULL) return;
+
+   png_debug(1, "in png_destroy_info_struct\n");
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (info_ptr != NULL)
+   {
+      png_info_destroy(png_ptr, info_ptr);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, png_ptr->free_fn,
+          png_ptr->mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+}
+
+/* Initialize the info structure.  This is now an internal function (0.89)
+ * and applications using it are urged to use png_create_info_struct()
+ * instead.
+ */
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+#undef png_info_init
+void PNGAPI
+png_info_init(png_infop info_ptr)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   png_info_init_3(&info_ptr, 0);
+}
+#endif
+
+void PNGAPI
+png_info_init_3(png_infopp ptr_ptr, png_size_t png_info_struct_size)
+{
+   png_infop info_ptr = *ptr_ptr;
+
+   if(info_ptr == NULL) return;
+
+   png_debug(1, "in png_info_init_3\n");
+
+   if(png_sizeof(png_info) > png_info_struct_size)
+     {
+       png_destroy_struct(info_ptr);
+       info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
+       *ptr_ptr = info_ptr;
+     }
+
+   /* set everything to 0 */
+   png_memset(info_ptr, 0, png_sizeof (png_info));
+}
+
+#ifdef PNG_FREE_ME_SUPPORTED
+void PNGAPI
+png_data_freer(png_structp png_ptr, png_infop info_ptr,
+   int freer, png_uint_32 mask)
+{
+   png_debug(1, "in png_data_freer\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if(freer == PNG_DESTROY_WILL_FREE_DATA)
+      info_ptr->free_me |= mask;
+   else if(freer == PNG_USER_WILL_FREE_DATA)
+      info_ptr->free_me &= ~mask;
+   else
+      png_warning(png_ptr,
+         "Unknown freer parameter in png_data_freer.");
+}
+#endif
+
+void PNGAPI
+png_free_data(png_structp png_ptr, png_infop info_ptr, png_uint_32 mask,
+   int num)
+{
+   png_debug(1, "in png_free_data\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+#if defined(PNG_TEXT_SUPPORTED)
+/* free text item num or (if num == -1) all text items */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_TEXT) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_TEXT)
+#endif
+{
+   if (num != -1)
+   {
+     if (info_ptr->text && info_ptr->text[num].key)
+     {
+         png_free(png_ptr, info_ptr->text[num].key);
+         info_ptr->text[num].key = NULL;
+     }
+   }
+   else
+   {
+       int i;
+       for (i = 0; i < info_ptr->num_text; i++)
+           png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, i);
+       png_free(png_ptr, info_ptr->text);
+       info_ptr->text = NULL;
+       info_ptr->num_text=0;
+   }
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+/* free any tRNS entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_TRNS) & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_TRNS) && (png_ptr->flags & PNG_FLAG_FREE_TRNS))
+#endif
+{
+    png_free(png_ptr, info_ptr->trans);
+    info_ptr->valid &= ~PNG_INFO_tRNS;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_TRNS;
+#endif
+    info_ptr->trans = NULL;
+}
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+/* free any sCAL entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_SCAL) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_SCAL)
+#endif
+{
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+    png_free(png_ptr, info_ptr->scal_s_width);
+    png_free(png_ptr, info_ptr->scal_s_height);
+    info_ptr->scal_s_width = NULL;
+    info_ptr->scal_s_height = NULL;
+#endif
+    info_ptr->valid &= ~PNG_INFO_sCAL;
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+/* free any pCAL entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_PCAL) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_PCAL)
+#endif
+{
+    png_free(png_ptr, info_ptr->pcal_purpose);
+    png_free(png_ptr, info_ptr->pcal_units);
+    info_ptr->pcal_purpose = NULL;
+    info_ptr->pcal_units = NULL;
+    if (info_ptr->pcal_params != NULL)
+    {
+        int i;
+        for (i = 0; i < (int)info_ptr->pcal_nparams; i++)
+        {
+          png_free(png_ptr, info_ptr->pcal_params[i]);
+          info_ptr->pcal_params[i]=NULL;
+        }
+        png_free(png_ptr, info_ptr->pcal_params);
+        info_ptr->pcal_params = NULL;
+    }
+    info_ptr->valid &= ~PNG_INFO_pCAL;
+}
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+/* free any iCCP entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_ICCP) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_ICCP)
+#endif
+{
+    png_free(png_ptr, info_ptr->iccp_name);
+    png_free(png_ptr, info_ptr->iccp_profile);
+    info_ptr->iccp_name = NULL;
+    info_ptr->iccp_profile = NULL;
+    info_ptr->valid &= ~PNG_INFO_iCCP;
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+/* free a given sPLT entry, or (if num == -1) all sPLT entries */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_SPLT) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_SPLT)
+#endif
+{
+   if (num != -1)
+   {
+      if(info_ptr->splt_palettes)
+      {
+          png_free(png_ptr, info_ptr->splt_palettes[num].name);
+          png_free(png_ptr, info_ptr->splt_palettes[num].entries);
+          info_ptr->splt_palettes[num].name = NULL;
+          info_ptr->splt_palettes[num].entries = NULL;
+      }
+   }
+   else
+   {
+       if(info_ptr->splt_palettes_num)
+       {
+         int i;
+         for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+            png_free_data(png_ptr, info_ptr, PNG_FREE_SPLT, i);
+
+         png_free(png_ptr, info_ptr->splt_palettes);
+         info_ptr->splt_palettes = NULL;
+         info_ptr->splt_palettes_num = 0;
+       }
+       info_ptr->valid &= ~PNG_INFO_sPLT;
+   }
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+  if(png_ptr->unknown_chunk.data)
+  {
+    png_free(png_ptr, png_ptr->unknown_chunk.data);
+    png_ptr->unknown_chunk.data = NULL;
+  }
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_UNKN) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_UNKN)
+#endif
+{
+   if (num != -1)
+   {
+       if(info_ptr->unknown_chunks)
+       {
+          png_free(png_ptr, info_ptr->unknown_chunks[num].data);
+          info_ptr->unknown_chunks[num].data = NULL;
+       }
+   }
+   else
+   {
+       int i;
+
+       if(info_ptr->unknown_chunks_num)
+       {
+         for (i = 0; i < (int)info_ptr->unknown_chunks_num; i++)
+            png_free_data(png_ptr, info_ptr, PNG_FREE_UNKN, i);
+
+         png_free(png_ptr, info_ptr->unknown_chunks);
+         info_ptr->unknown_chunks = NULL;
+         info_ptr->unknown_chunks_num = 0;
+       }
+   }
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+/* free any hIST entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_HIST)  & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_HIST) && (png_ptr->flags & PNG_FLAG_FREE_HIST))
+#endif
+{
+    png_free(png_ptr, info_ptr->hist);
+    info_ptr->hist = NULL;
+    info_ptr->valid &= ~PNG_INFO_hIST;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_HIST;
+#endif
+}
+#endif
+
+/* free any PLTE entry that was internally allocated */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_PLTE) & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_PLTE) && (png_ptr->flags & PNG_FLAG_FREE_PLTE))
+#endif
+{
+    png_zfree(png_ptr, info_ptr->palette);
+    info_ptr->palette = NULL;
+    info_ptr->valid &= ~PNG_INFO_PLTE;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_PLTE;
+#endif
+    info_ptr->num_palette = 0;
+}
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* free any image bits attached to the info structure */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_ROWS) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_ROWS)
+#endif
+{
+    if(info_ptr->row_pointers)
+    {
+       int row;
+       for (row = 0; row < (int)info_ptr->height; row++)
+       {
+          png_free(png_ptr, info_ptr->row_pointers[row]);
+          info_ptr->row_pointers[row]=NULL;
+       }
+       png_free(png_ptr, info_ptr->row_pointers);
+       info_ptr->row_pointers=NULL;
+    }
+    info_ptr->valid &= ~PNG_INFO_IDAT;
+}
+#endif
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   if(num == -1)
+     info_ptr->free_me &= ~mask;
+   else
+     info_ptr->free_me &= ~(mask & ~PNG_FREE_MUL);
+#endif
+}
+
+/* This is an internal routine to free any memory that the info struct is
+ * pointing to before re-using it or freeing the struct itself.  Recall
+ * that png_free() checks for NULL pointers for us.
+ */
+void /* PRIVATE */
+png_info_destroy(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_info_destroy\n");
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->num_chunk_list)
+   {
+       png_free(png_ptr, png_ptr->chunk_list);
+       png_ptr->chunk_list=NULL;
+       png_ptr->num_chunk_list=0;
+   }
+#endif
+
+   png_info_init_3(&info_ptr, png_sizeof(png_info));
+}
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+/* This function returns a pointer to the io_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy() or png_read_destroy() are called.
+ */
+png_voidp PNGAPI
+png_get_io_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return (png_ptr->io_ptr);
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+/* Initialize the default input/output functions for the PNG file.  If you
+ * use your own read or write routines, you can call either png_set_read_fn()
+ * or png_set_write_fn() instead of png_init_io().  If you have defined
+ * PNG_NO_STDIO, you must use a function of your own because "FILE *" isn't
+ * necessarily available.
+ */
+void PNGAPI
+png_init_io(png_structp png_ptr, png_FILE_p fp)
+{
+   png_debug(1, "in png_init_io\n");
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = (png_voidp)fp;
+}
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+/* Convert the supplied time into an RFC 1123 string suitable for use in
+ * a "Creation Time" or other text-based time string.
+ */
+png_charp PNGAPI
+png_convert_to_rfc1123(png_structp png_ptr, png_timep ptime)
+{
+   static PNG_CONST char short_months[12][4] =
+        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+   if(png_ptr == NULL) return (NULL);
+   if (png_ptr->time_buffer == NULL)
+   {
+      png_ptr->time_buffer = (png_charp)png_malloc(png_ptr, (png_uint_32)(29*
+         png_sizeof(char)));
+   }
+
+#if defined(_WIN32_WCE)
+   {
+      wchar_t time_buf[29];
+      wsprintf(time_buf, TEXT("%d %S %d %02d:%02d:%02d +0000"),
+          ptime->day % 32, short_months[(ptime->month - 1) % 12],
+        ptime->year, ptime->hour % 24, ptime->minute % 60,
+          ptime->second % 61);
+      WideCharToMultiByte(CP_ACP, 0, time_buf, -1, png_ptr->time_buffer, 29,
+          NULL, NULL);
+   }
+#else
+#ifdef USE_FAR_KEYWORD
+   {
+      char near_time_buf[29];
+      png_snprintf6(near_time_buf,29,"%d %s %d %02d:%02d:%02d +0000",
+          ptime->day % 32, short_months[(ptime->month - 1) % 12],
+          ptime->year, ptime->hour % 24, ptime->minute % 60,
+          ptime->second % 61);
+      png_memcpy(png_ptr->time_buffer, near_time_buf,
+          29*png_sizeof(char));
+   }
+#else
+   png_snprintf6(png_ptr->time_buffer,29,"%d %s %d %02d:%02d:%02d +0000",
+       ptime->day % 32, short_months[(ptime->month - 1) % 12],
+       ptime->year, ptime->hour % 24, ptime->minute % 60,
+       ptime->second % 61);
+#endif
+#endif /* _WIN32_WCE */
+   return ((png_charp)png_ptr->time_buffer);
+}
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+png_charp PNGAPI
+png_get_copyright(png_structp png_ptr)
+{
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) "\n libpng version 1.2.29 - May 8, 2008\n\
+   Copyright (c) 1998-2008 Glenn Randers-Pehrson\n\
+   Copyright (c) 1996-1997 Andreas Dilger\n\
+   Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.\n");
+}
+
+/* The following return the library version as a short string in the
+ * format 1.0.0 through 99.99.99zz.  To get the version of *.h files
+ * used with your application, print out PNG_LIBPNG_VER_STRING, which
+ * is defined in png.h.
+ * Note: now there is no difference between png_get_libpng_ver() and
+ * png_get_header_ver().  Due to the version_nn_nn_nn typedef guard,
+ * it is guaranteed that png.c uses the correct version of png.h.
+ */
+png_charp PNGAPI
+png_get_libpng_ver(png_structp png_ptr)
+{
+   /* Version of *.c files used when building libpng */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
+}
+
+png_charp PNGAPI
+png_get_header_ver(png_structp png_ptr)
+{
+   /* Version of *.h files used when building libpng */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
+}
+
+png_charp PNGAPI
+png_get_header_version(png_structp png_ptr)
+{
+   /* Returns longer string containing both version and date */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_HEADER_VERSION_STRING
+#ifndef PNG_READ_SUPPORTED
+   "     (NO READ SUPPORT)"
+#endif
+   "\n");
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+int PNGAPI
+png_handle_as_unknown(png_structp png_ptr, png_bytep chunk_name)
+{
+   /* check chunk_name and return "keep" value if it's on the list, else 0 */
+   int i;
+   png_bytep p;
+   if(png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list<=0)
+      return 0;
+   p=png_ptr->chunk_list+png_ptr->num_chunk_list*5-5;
+   for (i = png_ptr->num_chunk_list; i; i--, p-=5)
+      if (!png_memcmp(chunk_name, p, 4))
+        return ((int)*(p+4));
+   return 0;
+}
+#endif
+
+/* This function, added to libpng-1.0.6g, is untested. */
+int PNGAPI
+png_reset_zstream(png_structp png_ptr)
+{
+   if (png_ptr == NULL) return Z_STREAM_ERROR;
+   return (inflateReset(&png_ptr->zstream));
+}
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+/* This function was added to libpng-1.0.7 */
+png_uint_32 PNGAPI
+png_access_version_number(void)
+{
+   /* Version of *.c files used when building libpng */
+   return((png_uint_32) PNG_LIBPNG_VER);
+}
+
+
+#if defined(PNG_READ_SUPPORTED) && defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if !defined(PNG_1_0_X)
+/* this function was added to libpng 1.2.0 */
+int PNGAPI
+png_mmx_support(void)
+{
+   /* obsolete, to be removed from libpng-1.4.0 */
+    return -1;
+}
+#endif /* PNG_1_0_X */
+#endif /* PNG_READ_SUPPORTED && PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#ifdef PNG_SIZE_T
+/* Added at libpng version 1.2.6 */
+   PNG_EXTERN png_size_t PNGAPI png_convert_size PNGARG((size_t size));
+png_size_t PNGAPI
+png_convert_size(size_t size)
+{
+  if (size > (png_size_t)-1)
+     PNG_ABORT();  /* We haven't got access to png_ptr, so no png_error() */
+  return ((png_size_t)size);
+}
+#endif /* PNG_SIZE_T */
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
diff --git a/PNG/png.h b/PNG/png.h
new file mode 100644
index 0000000..c07407f
--- /dev/null
+++ b/PNG/png.h
@@ -0,0 +1,3569 @@
+/* png.h - header file for PNG reference library
+ *
+ * libpng version 1.2.29 - May 8, 2008
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * Authors and maintainers:
+ *  libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
+ *  libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
+ *  libpng versions 0.97, January 1998, through 1.2.29 - May 8, 2008: Glenn
+ *  See also "Contributing Authors", below.
+ *
+ * Note about libpng version numbers:
+ *
+ *    Due to various miscommunications, unforeseen code incompatibilities
+ *    and occasional factors outside the authors' control, version numbering
+ *    on the library has not always been consistent and straightforward.
+ *    The following table summarizes matters since version 0.89c, which was
+ *    the first widely used release:
+ *
+ *    source                 png.h  png.h  shared-lib
+ *    version                string   int  version
+ *    -------                ------ -----  ----------
+ *    0.89c "1.0 beta 3"     0.89      89  1.0.89
+ *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
+ *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
+ *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
+ *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
+ *    0.97c                  0.97      97  2.0.97
+ *    0.98                   0.98      98  2.0.98
+ *    0.99                   0.99      98  2.0.99
+ *    0.99a-m                0.99      99  2.0.99
+ *    1.00                   1.00     100  2.1.0 [100 should be 10000]
+ *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
+ *    1.0.1       png.h string is   10001  2.1.0
+ *    1.0.1a-e    identical to the  10002  from here on, the shared library
+ *    1.0.2       source version)   10002  is 2.V where V is the source code
+ *    1.0.2a-b                      10003  version, except as noted.
+ *    1.0.3                         10003
+ *    1.0.3a-d                      10004
+ *    1.0.4                         10004
+ *    1.0.4a-f                      10005
+ *    1.0.5 (+ 2 patches)           10005
+ *    1.0.5a-d                      10006
+ *    1.0.5e-r                      10100 (not source compatible)
+ *    1.0.5s-v                      10006 (not binary compatible)
+ *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
+ *    1.0.6d-f                      10007 (still binary incompatible)
+ *    1.0.6g                        10007
+ *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
+ *    1.0.6i                        10007  10.6i
+ *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
+ *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
+ *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
+ *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
+ *    1.0.7                    1    10007  (still compatible)
+ *    1.0.8beta1-4             1    10008  2.1.0.8beta1-4
+ *    1.0.8rc1                 1    10008  2.1.0.8rc1
+ *    1.0.8                    1    10008  2.1.0.8
+ *    1.0.9beta1-6             1    10009  2.1.0.9beta1-6
+ *    1.0.9rc1                 1    10009  2.1.0.9rc1
+ *    1.0.9beta7-10            1    10009  2.1.0.9beta7-10
+ *    1.0.9rc2                 1    10009  2.1.0.9rc2
+ *    1.0.9                    1    10009  2.1.0.9
+ *    1.0.10beta1              1    10010  2.1.0.10beta1
+ *    1.0.10rc1                1    10010  2.1.0.10rc1
+ *    1.0.10                   1    10010  2.1.0.10
+ *    1.0.11beta1-3            1    10011  2.1.0.11beta1-3
+ *    1.0.11rc1                1    10011  2.1.0.11rc1
+ *    1.0.11                   1    10011  2.1.0.11
+ *    1.0.12beta1-2            2    10012  2.1.0.12beta1-2
+ *    1.0.12rc1                2    10012  2.1.0.12rc1
+ *    1.0.12                   2    10012  2.1.0.12
+ *    1.1.0a-f                 -    10100  2.1.1.0a-f (branch abandoned)
+ *    1.2.0beta1-2             2    10200  2.1.2.0beta1-2
+ *    1.2.0beta3-5             3    10200  3.1.2.0beta3-5
+ *    1.2.0rc1                 3    10200  3.1.2.0rc1
+ *    1.2.0                    3    10200  3.1.2.0
+ *    1.2.1beta1-4             3    10201  3.1.2.1beta1-4
+ *    1.2.1rc1-2               3    10201  3.1.2.1rc1-2
+ *    1.2.1                    3    10201  3.1.2.1
+ *    1.2.2beta1-6            12    10202  12.so.0.1.2.2beta1-6
+ *    1.0.13beta1             10    10013  10.so.0.1.0.13beta1
+ *    1.0.13rc1               10    10013  10.so.0.1.0.13rc1
+ *    1.2.2rc1                12    10202  12.so.0.1.2.2rc1
+ *    1.0.13                  10    10013  10.so.0.1.0.13
+ *    1.2.2                   12    10202  12.so.0.1.2.2
+ *    1.2.3rc1-6              12    10203  12.so.0.1.2.3rc1-6
+ *    1.2.3                   12    10203  12.so.0.1.2.3
+ *    1.2.4beta1-3            13    10204  12.so.0.1.2.4beta1-3
+ *    1.0.14rc1               13    10014  10.so.0.1.0.14rc1
+ *    1.2.4rc1                13    10204  12.so.0.1.2.4rc1
+ *    1.0.14                  10    10014  10.so.0.1.0.14
+ *    1.2.4                   13    10204  12.so.0.1.2.4
+ *    1.2.5beta1-2            13    10205  12.so.0.1.2.5beta1-2
+ *    1.0.15rc1-3             10    10015  10.so.0.1.0.15rc1-3
+ *    1.2.5rc1-3              13    10205  12.so.0.1.2.5rc1-3
+ *    1.0.15                  10    10015  10.so.0.1.0.15
+ *    1.2.5                   13    10205  12.so.0.1.2.5
+ *    1.2.6beta1-4            13    10206  12.so.0.1.2.6beta1-4
+ *    1.0.16                  10    10016  10.so.0.1.0.16
+ *    1.2.6                   13    10206  12.so.0.1.2.6
+ *    1.2.7beta1-2            13    10207  12.so.0.1.2.7beta1-2
+ *    1.0.17rc1               10    10017  10.so.0.1.0.17rc1
+ *    1.2.7rc1                13    10207  12.so.0.1.2.7rc1
+ *    1.0.17                  10    10017  10.so.0.1.0.17
+ *    1.2.7                   13    10207  12.so.0.1.2.7
+ *    1.2.8beta1-5            13    10208  12.so.0.1.2.8beta1-5
+ *    1.0.18rc1-5             10    10018  10.so.0.1.0.18rc1-5
+ *    1.2.8rc1-5              13    10208  12.so.0.1.2.8rc1-5
+ *    1.0.18                  10    10018  10.so.0.1.0.18
+ *    1.2.8                   13    10208  12.so.0.1.2.8
+ *    1.2.9beta1-3            13    10209  12.so.0.1.2.9beta1-3
+ *    1.2.9beta4-11           13    10209  12.so.0.9[.0]
+ *    1.2.9rc1                13    10209  12.so.0.9[.0]
+ *    1.2.9                   13    10209  12.so.0.9[.0]
+ *    1.2.10beta1-8           13    10210  12.so.0.10[.0]
+ *    1.2.10rc1-3             13    10210  12.so.0.10[.0]
+ *    1.2.10                  13    10210  12.so.0.10[.0]
+ *    1.2.11beta1-4           13    10211  12.so.0.11[.0]
+ *    1.0.19rc1-5             10    10019  10.so.0.19[.0]
+ *    1.2.11rc1-5             13    10211  12.so.0.11[.0]
+ *    1.0.19                  10    10019  10.so.0.19[.0]
+ *    1.2.11                  13    10211  12.so.0.11[.0]
+ *    1.0.20                  10    10020  10.so.0.20[.0]
+ *    1.2.12                  13    10212  12.so.0.12[.0]
+ *    1.2.13beta1             13    10213  12.so.0.13[.0]
+ *    1.0.21                  10    10021  10.so.0.21[.0]
+ *    1.2.13                  13    10213  12.so.0.13[.0]
+ *    1.2.14beta1-2           13    10214  12.so.0.14[.0]
+ *    1.0.22rc1               10    10022  10.so.0.22[.0]
+ *    1.2.14rc1               13    10214  12.so.0.14[.0]
+ *    1.0.22                  10    10022  10.so.0.22[.0]
+ *    1.2.14                  13    10214  12.so.0.14[.0]
+ *    1.2.15beta1-6           13    10215  12.so.0.15[.0]
+ *    1.0.23rc1-5             10    10023  10.so.0.23[.0]
+ *    1.2.15rc1-5             13    10215  12.so.0.15[.0]
+ *    1.0.23                  10    10023  10.so.0.23[.0]
+ *    1.2.15                  13    10215  12.so.0.15[.0]
+ *    1.2.16beta1-2           13    10216  12.so.0.16[.0]
+ *    1.2.16rc1               13    10216  12.so.0.16[.0]
+ *    1.0.24                  10    10024  10.so.0.24[.0]
+ *    1.2.16                  13    10216  12.so.0.16[.0]
+ *    1.2.17beta1-2           13    10217  12.so.0.17[.0]
+ *    1.0.25rc1               10    10025  10.so.0.25[.0]
+ *    1.2.17rc1-3             13    10217  12.so.0.17[.0]
+ *    1.0.25                  10    10025  10.so.0.25[.0]
+ *    1.2.17                  13    10217  12.so.0.17[.0]
+ *    1.0.26                  10    10026  10.so.0.26[.0]
+ *    1.2.18                  13    10218  12.so.0.18[.0]
+ *    1.2.19beta1-31          13    10219  12.so.0.19[.0]
+ *    1.0.27rc1-6             10    10027  10.so.0.27[.0]
+ *    1.2.19rc1-6             13    10219  12.so.0.19[.0]
+ *    1.0.27                  10    10027  10.so.0.27[.0]
+ *    1.2.19                  13    10219  12.so.0.19[.0]
+ *    1.2.20beta01-04         13    10220  12.so.0.20[.0]
+ *    1.0.28rc1-6             10    10028  10.so.0.28[.0]
+ *    1.2.20rc1-6             13    10220  12.so.0.20[.0]
+ *    1.0.28                  10    10028  10.so.0.28[.0]
+ *    1.2.20                  13    10220  12.so.0.20[.0]
+ *    1.2.21beta1-2           13    10221  12.so.0.21[.0]
+ *    1.2.21rc1-3             13    10221  12.so.0.21[.0]
+ *    1.0.29                  10    10029  10.so.0.29[.0]
+ *    1.2.21                  13    10221  12.so.0.21[.0]
+ *    1.2.22beta1-4           13    10222  12.so.0.22[.0]
+ *    1.0.30rc1               10    10030  10.so.0.30[.0]
+ *    1.2.22rc1               13    10222  12.so.0.22[.0]
+ *    1.0.30                  10    10030  10.so.0.30[.0]
+ *    1.2.22                  13    10222  12.so.0.22[.0]
+ *    1.2.23beta01-05         13    10223  12.so.0.23[.0]
+ *    1.2.23rc01              13    10223  12.so.0.23[.0]
+ *    1.2.23                  13    10223  12.so.0.23[.0]
+ *    1.2.24beta01-02         13    10224  12.so.0.24[.0]
+ *    1.2.24rc01              13    10224  12.so.0.24[.0]
+ *    1.2.24                  13    10224  12.so.0.24[.0]
+ *    1.2.25beta01-06         13    10225  12.so.0.25[.0]
+ *    1.2.25rc01-02           13    10225  12.so.0.25[.0]
+ *    1.0.31                  10    10031  10.so.0.31[.0]
+ *    1.2.25                  13    10225  12.so.0.25[.0]
+ *    1.2.26beta01-06         13    10226  12.so.0.26[.0]
+ *    1.2.26rc01              13    10226  12.so.0.26[.0]
+ *    1.2.26                  13    10226  12.so.0.26[.0]
+ *    1.0.32                  10    10032  10.so.0.32[.0]
+ *    1.2.27beta01-06         13    10227  12.so.0.27[.0]
+ *    1.2.27rc01              13    10227  12.so.0.27[.0]
+ *    1.0.33                  10    10033  10.so.0.33[.0]
+ *    1.2.27                  13    10227  12.so.0.27[.0]
+ *    1.0.34                  10    10034  10.so.0.34[.0]
+ *    1.2.28                  13    10228  12.so.0.28[.0]
+ *    1.2.29beta01-03         13    10229  12.so.0.29[.0]
+ *    1.2.29rc01              13    10229  12.so.0.29[.0]
+ *    1.0.35                  10    10035  10.so.0.35[.0]
+ *    1.2.29                  13    10229  12.so.0.29[.0]
+ *
+ *    Henceforth the source version will match the shared-library major
+ *    and minor numbers; the shared-library major version number will be
+ *    used for changes in backward compatibility, as it is intended.  The
+ *    PNG_LIBPNG_VER macro, which is not used within libpng but is available
+ *    for applications, is an unsigned integer of the form xyyzz corresponding
+ *    to the source version x.y.z (leading zeros in y and z).  Beta versions
+ *    were given the previous public release number plus a letter, until
+ *    version 1.0.6j; from then on they were given the upcoming public
+ *    release number plus "betaNN" or "rcNN".
+ *
+ *    Binary incompatibility exists only when applications make direct access
+ *    to the info_ptr or png_ptr members through png.h, and the compiled
+ *    application is loaded with a different version of the library.
+ *
+ *    DLLNUM will change each time there are forward or backward changes
+ *    in binary compatibility (e.g., when a new feature is added).
+ *
+ * See libpng.txt or libpng.3 for more information.  The PNG specification
+ * is available as a W3C Recommendation and as an ISO Specification,
+ * <http://www.w3.org/TR/2003/REC-PNG-20031110/
+ */
+
+/*
+ * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
+ *
+ * If you modify libpng you may insert additional notices immediately following
+ * this sentence.
+ *
+ * libpng versions 1.2.6, August 15, 2004, through 1.2.29, May 8, 2008, are
+ * Copyright (c) 2004, 2006-2008 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.2.5
+ * with the following individual added to the list of Contributing Authors:
+ *
+ *    Cosmin Truta
+ *
+ * libpng versions 1.0.7, July 1, 2000, through 1.2.5, October 3, 2002, are
+ * Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.0.6
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Simon-Pierre Cadieux
+ *    Eric S. Raymond
+ *    Gilles Vollant
+ *
+ * and with the following additions to the disclaimer:
+ *
+ *    There is no warranty against interference with your enjoyment of the
+ *    library or against infringement.  There is no warranty that our
+ *    efforts or the library will fulfill any of your particular purposes
+ *    or needs.  This library is provided with all faults, and the entire
+ *    risk of satisfactory quality, performance, accuracy, and effort is with
+ *    the user.
+ *
+ * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
+ * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-0.96,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Tom Lane
+ *    Glenn Randers-Pehrson
+ *    Willem van Schaik
+ *
+ * libpng versions 0.89, June 1996, through 0.96, May 1997, are
+ * Copyright (c) 1996, 1997 Andreas Dilger
+ * Distributed according to the same disclaimer and license as libpng-0.88,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    John Bowler
+ *    Kevin Bracey
+ *    Sam Bushell
+ *    Magnus Holmgren
+ *    Greg Roelofs
+ *    Tom Tanner
+ *
+ * libpng versions 0.5, May 1995, through 0.88, January 1996, are
+ * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * For the purposes of this copyright and license, "Contributing Authors"
+ * is defined as the following set of individuals:
+ *
+ *    Andreas Dilger
+ *    Dave Martindale
+ *    Guy Eric Schalnat
+ *    Paul Schmidt
+ *    Tim Wegner
+ *
+ * The PNG Reference Library is supplied "AS IS".  The Contributing Authors
+ * and Group 42, Inc. disclaim all warranties, expressed or implied,
+ * including, without limitation, the warranties of merchantability and of
+ * fitness for any purpose.  The Contributing Authors and Group 42, Inc.
+ * assume no liability for direct, indirect, incidental, special, exemplary,
+ * or consequential damages, which may result from the use of the PNG
+ * Reference Library, even if advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * source code, or portions hereof, for any purpose, without fee, subject
+ * to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ *
+ * 2. Altered versions must be plainly marked as such and
+ * must not be misrepresented as being the original source.
+ *
+ * 3. This Copyright notice may not be removed or altered from
+ *    any source or altered source distribution.
+ *
+ * The Contributing Authors and Group 42, Inc. specifically permit, without
+ * fee, and encourage the use of this source code as a component to
+ * supporting the PNG file format in commercial products.  If you use this
+ * source code in a product, acknowledgment is not required but would be
+ * appreciated.
+ */
+
+/*
+ * A "png_get_copyright" function is available, for convenient use in "about"
+ * boxes and the like:
+ *
+ * printf("%s",png_get_copyright(NULL));
+ *
+ * Also, the PNG logo (in PNG format, of course) is supplied in the
+ * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
+ */
+
+/*
+ * Libpng is OSI Certified Open Source Software.  OSI Certified is a
+ * certification mark of the Open Source Initiative.
+ */
+
+/*
+ * The contributing authors would like to thank all those who helped
+ * with testing, bug fixes, and patience.  This wouldn't have been
+ * possible without all of you.
+ *
+ * Thanks to Frank J. T. Wojcik for helping with the documentation.
+ */
+
+/*
+ * Y2K compliance in libpng:
+ * =========================
+ *
+ *    May 8, 2008
+ *
+ *    Since the PNG Development group is an ad-hoc body, we can't make
+ *    an official declaration.
+ *
+ *    This is your unofficial assurance that libpng from version 0.71 and
+ *    upward through 1.2.29 are Y2K compliant.  It is my belief that earlier
+ *    versions were also Y2K compliant.
+ *
+ *    Libpng only has three year fields.  One is a 2-byte unsigned integer
+ *    that will hold years up to 65535.  The other two hold the date in text
+ *    format, and will hold years up to 9999.
+ *
+ *    The integer is
+ *        "png_uint_16 year" in png_time_struct.
+ *
+ *    The strings are
+ *        "png_charp time_buffer" in png_struct and
+ *        "near_time_buffer", which is a local character string in png.c.
+ *
+ *    There are seven time-related functions:
+ *        png.c: png_convert_to_rfc_1123() in png.c
+ *          (formerly png_convert_to_rfc_1152() in error)
+ *        png_convert_from_struct_tm() in pngwrite.c, called in pngwrite.c
+ *        png_convert_from_time_t() in pngwrite.c
+ *        png_get_tIME() in pngget.c
+ *        png_handle_tIME() in pngrutil.c, called in pngread.c
+ *        png_set_tIME() in pngset.c
+ *        png_write_tIME() in pngwutil.c, called in pngwrite.c
+ *
+ *    All handle dates properly in a Y2K environment.  The
+ *    png_convert_from_time_t() function calls gmtime() to convert from system
+ *    clock time, which returns (year - 1900), which we properly convert to
+ *    the full 4-digit year.  There is a possibility that applications using
+ *    libpng are not passing 4-digit years into the png_convert_to_rfc_1123()
+ *    function, or that they are incorrectly passing only a 2-digit year
+ *    instead of "year - 1900" into the png_convert_from_struct_tm() function,
+ *    but this is not under our control.  The libpng documentation has always
+ *    stated that it works with 4-digit years, and the APIs have been
+ *    documented as such.
+ *
+ *    The tIME chunk itself is also Y2K compliant.  It uses a 2-byte unsigned
+ *    integer to hold the year, and can hold years as large as 65535.
+ *
+ *    zlib, upon which libpng depends, is also Y2K compliant.  It contains
+ *    no date-related code.
+ *
+ *       Glenn Randers-Pehrson
+ *       libpng maintainer
+ *       PNG Development Group
+ */
+
+#ifndef PNG_H
+#define PNG_H
+
+/* This is not the place to learn how to use libpng.  The file libpng.txt
+ * describes how to use libpng, and the file example.c summarizes it
+ * with some code on which to build.  This file is useful for looking
+ * at the actual function definitions and structure components.
+ */
+
+/* Version information for png.h - this should match the version in png.c */
+#define PNG_LIBPNG_VER_STRING "1.2.29"
+#define PNG_HEADER_VERSION_STRING \
+   " libpng version 1.2.29 - May 8, 2008\n"
+
+#define PNG_LIBPNG_VER_SONUM   0
+#define PNG_LIBPNG_VER_DLLNUM  13
+
+/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
+#define PNG_LIBPNG_VER_MAJOR   1
+#define PNG_LIBPNG_VER_MINOR   2
+#define PNG_LIBPNG_VER_RELEASE 29
+/* This should match the numeric part of the final component of
+ * PNG_LIBPNG_VER_STRING, omitting any leading zero: */
+
+#define PNG_LIBPNG_VER_BUILD  0
+
+/* Release Status */
+#define PNG_LIBPNG_BUILD_ALPHA    1
+#define PNG_LIBPNG_BUILD_BETA     2
+#define PNG_LIBPNG_BUILD_RC       3
+#define PNG_LIBPNG_BUILD_STABLE   4
+#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
+  
+/* Release-Specific Flags */
+#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
+                                       PNG_LIBPNG_BUILD_STABLE only */
+#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_SPECIAL */
+#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_PRIVATE */
+
+#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
+
+/* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
+ * We must not include leading zeros.
+ * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only
+ * version 1.0.0 was mis-numbered 100 instead of 10000).  From
+ * version 1.0.1 it's    xxyyzz, where x=major, y=minor, z=release */
+#define PNG_LIBPNG_VER 10229 /* 1.2.29 */
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* include the compression library's header */
+#include "ZLIB/zlib.h"
+#endif
+
+/* include all user configurable info, including optional assembler routines */
+#include "pngconf.h"
+
+/*
+ * Added at libpng-1.2.8 */
+/* Ref MSDN: Private as priority over Special
+ * VS_FF_PRIVATEBUILD File *was not* built using standard release
+ * procedures. If this value is given, the StringFileInfo block must
+ * contain a PrivateBuild string. 
+ *
+ * VS_FF_SPECIALBUILD File *was* built by the original company using
+ * standard release procedures but is a variation of the standard
+ * file of the same version number. If this value is given, the
+ * StringFileInfo block must contain a SpecialBuild string. 
+ */
+
+#if defined(PNG_USER_PRIVATEBUILD)
+#  define PNG_LIBPNG_BUILD_TYPE \
+          (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
+#else
+#  if defined(PNG_LIBPNG_SPECIALBUILD)
+#    define PNG_LIBPNG_BUILD_TYPE \
+            (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
+#  else
+#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
+#  endif
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* This file is arranged in several sections.  The first section contains
+ * structure and type definitions.  The second section contains the external
+ * library functions, while the third has the internal library functions,
+ * which applications aren't expected to use directly.
+ */
+
+#ifndef PNG_NO_TYPECAST_NULL
+#define int_p_NULL                (int *)NULL
+#define png_bytep_NULL            (png_bytep)NULL
+#define png_bytepp_NULL           (png_bytepp)NULL
+#define png_doublep_NULL          (png_doublep)NULL
+#define png_error_ptr_NULL        (png_error_ptr)NULL
+#define png_flush_ptr_NULL        (png_flush_ptr)NULL
+#define png_free_ptr_NULL         (png_free_ptr)NULL
+#define png_infopp_NULL           (png_infopp)NULL
+#define png_malloc_ptr_NULL       (png_malloc_ptr)NULL
+#define png_read_status_ptr_NULL  (png_read_status_ptr)NULL
+#define png_rw_ptr_NULL           (png_rw_ptr)NULL
+#define png_structp_NULL          (png_structp)NULL
+#define png_uint_16p_NULL         (png_uint_16p)NULL
+#define png_voidp_NULL            (png_voidp)NULL
+#define png_write_status_ptr_NULL (png_write_status_ptr)NULL
+#else
+#define int_p_NULL                NULL
+#define png_bytep_NULL            NULL
+#define png_bytepp_NULL           NULL
+#define png_doublep_NULL          NULL
+#define png_error_ptr_NULL        NULL
+#define png_flush_ptr_NULL        NULL
+#define png_free_ptr_NULL         NULL
+#define png_infopp_NULL           NULL
+#define png_malloc_ptr_NULL       NULL
+#define png_read_status_ptr_NULL  NULL
+#define png_rw_ptr_NULL           NULL
+#define png_structp_NULL          NULL
+#define png_uint_16p_NULL         NULL
+#define png_voidp_NULL            NULL
+#define png_write_status_ptr_NULL NULL
+#endif
+
+/* variables declared in png.c - only it needs to define PNG_NO_EXTERN */
+#if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
+/* Version information for C files, stored in png.c.  This had better match
+ * the version above.
+ */
+#ifdef PNG_USE_GLOBAL_ARRAYS
+PNG_EXPORT_VAR (PNG_CONST char) png_libpng_ver[18];
+  /* need room for 99.99.99beta99z */
+#else
+#define png_libpng_ver png_get_header_ver(NULL)
+#endif
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+/* This was removed in version 1.0.5c */
+/* Structures to facilitate easy interlacing.  See png.c for more details */
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_start[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_inc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_ystart[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_yinc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_mask[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_dsp_mask[7];
+/* This isn't currently used.  If you need it, see png.c for more details.
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_height[7];
+*/
+#endif
+
+#endif /* PNG_NO_EXTERN */
+
+/* Three color definitions.  The order of the red, green, and blue, (and the
+ * exact size) is not important, although the size of the fields need to
+ * be png_byte or png_uint_16 (as defined below).
+ */
+typedef struct png_color_struct
+{
+   png_byte red;
+   png_byte green;
+   png_byte blue;
+} png_color;
+typedef png_color FAR * png_colorp;
+typedef png_color FAR * FAR * png_colorpp;
+
+typedef struct png_color_16_struct
+{
+   png_byte index;    /* used for palette files */
+   png_uint_16 red;   /* for use in red green blue files */
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 gray;  /* for use in grayscale files */
+} png_color_16;
+typedef png_color_16 FAR * png_color_16p;
+typedef png_color_16 FAR * FAR * png_color_16pp;
+
+typedef struct png_color_8_struct
+{
+   png_byte red;   /* for use in red green blue files */
+   png_byte green;
+   png_byte blue;
+   png_byte gray;  /* for use in grayscale files */
+   png_byte alpha; /* for alpha channel files */
+} png_color_8;
+typedef png_color_8 FAR * png_color_8p;
+typedef png_color_8 FAR * FAR * png_color_8pp;
+
+/*
+ * The following two structures are used for the in-core representation
+ * of sPLT chunks.
+ */
+typedef struct png_sPLT_entry_struct
+{
+   png_uint_16 red;
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 alpha;
+   png_uint_16 frequency;
+} png_sPLT_entry;
+typedef png_sPLT_entry FAR * png_sPLT_entryp;
+typedef png_sPLT_entry FAR * FAR * png_sPLT_entrypp;
+
+/*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
+ *  occupy the LSB of their respective members, and the MSB of each member
+ *  is zero-filled.  The frequency member always occupies the full 16 bits.
+ */
+
+typedef struct png_sPLT_struct
+{
+   png_charp name;           /* palette name */
+   png_byte depth;           /* depth of palette samples */
+   png_sPLT_entryp entries;  /* palette entries */
+   png_int_32 nentries;      /* number of palette entries */
+} png_sPLT_t;
+typedef png_sPLT_t FAR * png_sPLT_tp;
+typedef png_sPLT_t FAR * FAR * png_sPLT_tpp;
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
+ * and whether that contents is compressed or not.  The "key" field
+ * points to a regular zero-terminated C string.  The "text", "lang", and
+ * "lang_key" fields can be regular C strings, empty strings, or NULL pointers.
+ * However, the * structure returned by png_get_text() will always contain
+ * regular zero-terminated C strings (possibly empty), never NULL pointers,
+ * so they can be safely used in printf() and other string-handling functions.
+ */
+typedef struct png_text_struct
+{
+   int  compression;       /* compression value:
+                             -1: tEXt, none
+                              0: zTXt, deflate
+                              1: iTXt, none
+                              2: iTXt, deflate  */
+   png_charp key;          /* keyword, 1-79 character description of "text" */
+   png_charp text;         /* comment, may be an empty string (ie "")
+                              or a NULL pointer */
+   png_size_t text_length; /* length of the text string */
+#ifdef PNG_iTXt_SUPPORTED
+   png_size_t itxt_length; /* length of the itxt string */
+   png_charp lang;         /* language code, 0-79 characters
+                              or a NULL pointer */
+   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
+                              chars or a NULL pointer */
+#endif
+} png_text;
+typedef png_text FAR * png_textp;
+typedef png_text FAR * FAR * png_textpp;
+#endif
+
+/* Supported compression types for text in PNG files (tEXt, and zTXt).
+ * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
+#define PNG_TEXT_COMPRESSION_NONE_WR -3
+#define PNG_TEXT_COMPRESSION_zTXt_WR -2
+#define PNG_TEXT_COMPRESSION_NONE    -1
+#define PNG_TEXT_COMPRESSION_zTXt     0
+#define PNG_ITXT_COMPRESSION_NONE     1
+#define PNG_ITXT_COMPRESSION_zTXt     2
+#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
+
+/* png_time is a way to hold the time in an machine independent way.
+ * Two conversions are provided, both from time_t and struct tm.  There
+ * is no portable way to convert to either of these structures, as far
+ * as I know.  If you know of a portable way, send it to me.  As a side
+ * note - PNG has always been Year 2000 compliant!
+ */
+typedef struct png_time_struct
+{
+   png_uint_16 year; /* full year, as in, 1995 */
+   png_byte month;   /* month of year, 1 - 12 */
+   png_byte day;     /* day of month, 1 - 31 */
+   png_byte hour;    /* hour of day, 0 - 23 */
+   png_byte minute;  /* minute of hour, 0 - 59 */
+   png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
+} png_time;
+typedef png_time FAR * png_timep;
+typedef png_time FAR * FAR * png_timepp;
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+/* png_unknown_chunk is a structure to hold queued chunks for which there is
+ * no specific support.  The idea is that we can use this to queue
+ * up private chunks for output even though the library doesn't actually
+ * know about their semantics.
+ */
+#define PNG_CHUNK_NAME_LENGTH 5
+typedef struct png_unknown_chunk_t
+{
+    png_byte name[PNG_CHUNK_NAME_LENGTH];
+    png_byte *data;
+    png_size_t size;
+
+    /* libpng-using applications should NOT directly modify this byte. */
+    png_byte location; /* mode of operation at read time */
+}
+png_unknown_chunk;
+typedef png_unknown_chunk FAR * png_unknown_chunkp;
+typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp;
+#endif
+
+/* png_info is a structure that holds the information in a PNG file so
+ * that the application can find out the characteristics of the image.
+ * If you are reading the file, this structure will tell you what is
+ * in the PNG file.  If you are writing the file, fill in the information
+ * you want to put into the PNG file, then call png_write_info().
+ * The names chosen should be very close to the PNG specification, so
+ * consult that document for information about the meaning of each field.
+ *
+ * With libpng < 0.95, it was only possible to directly set and read the
+ * the values in the png_info_struct, which meant that the contents and
+ * order of the values had to remain fixed.  With libpng 0.95 and later,
+ * however, there are now functions that abstract the contents of
+ * png_info_struct from the application, so this makes it easier to use
+ * libpng with dynamic libraries, and even makes it possible to use
+ * libraries that don't have all of the libpng ancillary chunk-handing
+ * functionality.
+ *
+ * In any case, the order of the parameters in png_info_struct should NOT
+ * be changed for as long as possible to keep compatibility with applications
+ * that use the old direct-access method with png_info_struct.
+ *
+ * The following members may have allocated storage attached that should be
+ * cleaned up before the structure is discarded: palette, trans, text,
+ * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
+ * splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
+ * are automatically freed when the info structure is deallocated, if they were
+ * allocated internally by libpng.  This behavior can be changed by means
+ * of the png_data_freer() function.
+ *
+ * More allocation details: all the chunk-reading functions that
+ * change these members go through the corresponding png_set_*
+ * functions.  A function to clear these members is available: see
+ * png_free_data().  The png_set_* functions do not depend on being
+ * able to point info structure members to any of the storage they are
+ * passed (they make their own copies), EXCEPT that the png_set_text
+ * functions use the same storage passed to them in the text_ptr or
+ * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
+ * functions do not make their own copies.
+ */
+typedef struct png_info_struct
+{
+   /* the following are necessary for every PNG file */
+   png_uint_32 width;       /* width of image in pixels (from IHDR) */
+   png_uint_32 height;      /* height of image in pixels (from IHDR) */
+   png_uint_32 valid;       /* valid chunk data (see PNG_INFO_ below) */
+   png_uint_32 rowbytes;    /* bytes needed to hold an untransformed row */
+   png_colorp palette;      /* array of color values (valid & PNG_INFO_PLTE) */
+   png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */
+   png_uint_16 num_trans;   /* number of transparent palette color (tRNS) */
+   png_byte bit_depth;      /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */
+   png_byte color_type;     /* see PNG_COLOR_TYPE_ below (from IHDR) */
+   /* The following three should have been named *_method not *_type */
+   png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */
+   png_byte filter_type;    /* must be PNG_FILTER_TYPE_BASE (from IHDR) */
+   png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+
+   /* The following is informational only on read, and not used on writes. */
+   png_byte channels;       /* number of data channels per pixel (1, 2, 3, 4) */
+   png_byte pixel_depth;    /* number of bits per pixel */
+   png_byte spare_byte;     /* to align the data, and for future use */
+   png_byte signature[8];   /* magic bytes read by libpng from start of file */
+
+   /* The rest of the data is optional.  If you are reading, check the
+    * valid field to see if the information in these are valid.  If you
+    * are writing, set the valid field to those chunks you want written,
+    * and initialize the appropriate fields below.
+    */
+
+#if defined(PNG_gAMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+   /* The gAMA chunk describes the gamma characteristics of the system
+    * on which the image was created, normally in the range [1.0, 2.5].
+    * Data is valid if (valid & PNG_INFO_gAMA) is non-zero.
+    */
+   float gamma; /* gamma value of image, if (valid & PNG_INFO_gAMA) */
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+    /* GR-P, 0.96a */
+    /* Data valid if (valid & PNG_INFO_sRGB) non-zero. */
+   png_byte srgb_intent; /* sRGB rendering intent [0, 1, 2, or 3] */
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+   /* The tEXt, and zTXt chunks contain human-readable textual data in
+    * uncompressed, compressed, and optionally compressed forms, respectively.
+    * The data in "text" is an array of pointers to uncompressed,
+    * null-terminated C strings. Each chunk has a keyword that describes the
+    * textual data contained in that chunk.  Keywords are not required to be
+    * unique, and the text string may be empty.  Any number of text chunks may
+    * be in an image.
+    */
+   int num_text; /* number of comments read/to write */
+   int max_text; /* current size of text array */
+   png_textp text; /* array of comments read/to write */
+#endif /* PNG_TEXT_SUPPORTED */
+
+#if defined(PNG_tIME_SUPPORTED)
+   /* The tIME chunk holds the last time the displayed image data was
+    * modified.  See the png_time struct for the contents of this struct.
+    */
+   png_time mod_time;
+#endif
+
+#if defined(PNG_sBIT_SUPPORTED)
+   /* The sBIT chunk specifies the number of significant high-order bits
+    * in the pixel data.  Values are in the range [1, bit_depth], and are
+    * only specified for the channels in the pixel data.  The contents of
+    * the low-order bits is not specified.  Data is valid if
+    * (valid & PNG_INFO_sBIT) is non-zero.
+    */
+   png_color_8 sig_bit; /* significant bits in color channels */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
+defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The tRNS chunk supplies transparency data for paletted images and
+    * other image types that don't need a full alpha channel.  There are
+    * "num_trans" transparency values for a paletted image, stored in the
+    * same order as the palette colors, starting from index 0.  Values
+    * for the data are in the range [0, 255], ranging from fully transparent
+    * to fully opaque, respectively.  For non-paletted images, there is a
+    * single color specified that should be treated as fully transparent.
+    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
+    */
+   png_bytep trans; /* transparent values for paletted image */
+   png_color_16 trans_values; /* transparent color for non-palette image */
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The bKGD chunk gives the suggested image background color if the
+    * display program does not have its own background color and the image
+    * is needs to composited onto a background before display.  The colors
+    * in "background" are normally in the same color space/depth as the
+    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
+    */
+   png_color_16 background;
+#endif
+
+#if defined(PNG_oFFs_SUPPORTED)
+   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
+    * and downwards from the top-left corner of the display, page, or other
+    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
+    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
+    */
+   png_int_32 x_offset; /* x offset on page */
+   png_int_32 y_offset; /* y offset on page */
+   png_byte offset_unit_type; /* offset units type */
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+   /* The pHYs chunk gives the physical pixel density of the image for
+    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
+    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
+    */
+   png_uint_32 x_pixels_per_unit; /* horizontal pixel density */
+   png_uint_32 y_pixels_per_unit; /* vertical pixel density */
+   png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+   /* The hIST chunk contains the relative frequency or importance of the
+    * various palette entries, so that a viewer can intelligently select a
+    * reduced-color palette, if required.  Data is an array of "num_palette"
+    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
+    * is non-zero.
+    */
+   png_uint_16p hist;
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+   /* The cHRM chunk describes the CIE color characteristics of the monitor
+    * on which the PNG was created.  This data allows the viewer to do gamut
+    * mapping of the input image to ensure that the viewer sees the same
+    * colors in the image as the creator.  Values are in the range
+    * [0.0, 0.8].  Data valid if (valid & PNG_INFO_cHRM) non-zero.
+    */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float x_white;
+   float y_white;
+   float x_red;
+   float y_red;
+   float x_green;
+   float y_green;
+   float x_blue;
+   float y_blue;
+#endif
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+   /* The pCAL chunk describes a transformation between the stored pixel
+    * values and original physical data values used to create the image.
+    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
+    * range given by [pcal_X0, pcal_X1], and are further transformed by a
+    * (possibly non-linear) transformation function given by "pcal_type"
+    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
+    * defines below, and the PNG-Group's PNG extensions document for a
+    * complete description of the transformations and how they should be
+    * implemented, and for a description of the ASCII parameter strings.
+    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
+    */
+   png_charp pcal_purpose;  /* pCAL chunk description string */
+   png_int_32 pcal_X0;      /* minimum value */
+   png_int_32 pcal_X1;      /* maximum value */
+   png_charp pcal_units;    /* Latin-1 string giving physical units */
+   png_charpp pcal_params;  /* ASCII strings containing parameter values */
+   png_byte pcal_type;      /* equation type (see PNG_EQUATION_ below) */
+   png_byte pcal_nparams;   /* number of parameters given in pcal_params */
+#endif
+
+/* New members added in libpng-1.0.6 */
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_uint_32 free_me;     /* flags items libpng is responsible for freeing */
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   /* storage for unknown chunks that the library doesn't recognize. */
+   png_unknown_chunkp unknown_chunks;
+   png_size_t unknown_chunks_num;
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+   /* iCCP chunk data. */
+   png_charp iccp_name;     /* profile name */
+   png_charp iccp_profile;  /* International Color Consortium profile data */
+                            /* Note to maintainer: should be png_bytep */
+   png_uint_32 iccp_proflen;  /* ICC profile data length */
+   png_byte iccp_compression; /* Always zero */
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+   /* data on sPLT chunks (there may be more than one). */
+   png_sPLT_tp splt_palettes;
+   png_uint_32 splt_palettes_num;
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+   /* The sCAL chunk describes the actual physical dimensions of the
+    * subject matter of the graphic.  The chunk contains a unit specification
+    * a byte value, and two ASCII strings representing floating-point
+    * values.  The values are width and height corresponsing to one pixel
+    * in the image.  This external representation is converted to double
+    * here.  Data values are valid if (valid & PNG_INFO_sCAL) is non-zero.
+    */
+   png_byte scal_unit;         /* unit of physical scale */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double scal_pixel_width;    /* width of one pixel */
+   double scal_pixel_height;   /* height of one pixel */
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp scal_s_width;     /* string containing height */
+   png_charp scal_s_height;    /* string containing width */
+#endif
+#endif
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS) non-zero */
+   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
+   png_bytepp row_pointers;        /* the image bits */
+#endif
+
+#if defined(PNG_FIXED_POINT_SUPPORTED) && defined(PNG_gAMA_SUPPORTED)
+   png_fixed_point int_gamma; /* gamma of image, if (valid & PNG_INFO_gAMA) */
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED) && defined(PNG_FIXED_POINT_SUPPORTED)
+   png_fixed_point int_x_white;
+   png_fixed_point int_y_white;
+   png_fixed_point int_x_red;
+   png_fixed_point int_y_red;
+   png_fixed_point int_x_green;
+   png_fixed_point int_y_green;
+   png_fixed_point int_x_blue;
+   png_fixed_point int_y_blue;
+#endif
+
+} png_info;
+
+typedef png_info FAR * png_infop;
+typedef png_info FAR * FAR * png_infopp;
+
+/* Maximum positive integer used in PNG is (2^31)-1 */
+#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
+#define PNG_UINT_32_MAX ((png_uint_32)(-1))
+#define PNG_SIZE_MAX ((png_size_t)(-1))
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* PNG_MAX_UINT is deprecated; use PNG_UINT_31_MAX instead. */
+#define PNG_MAX_UINT PNG_UINT_31_MAX
+#endif
+
+/* These describe the color_type field in png_info. */
+/* color type masks */
+#define PNG_COLOR_MASK_PALETTE    1
+#define PNG_COLOR_MASK_COLOR      2
+#define PNG_COLOR_MASK_ALPHA      4
+
+/* color types.  Note that not all combinations are legal */
+#define PNG_COLOR_TYPE_GRAY 0
+#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
+#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
+#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
+#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
+/* aliases */
+#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
+#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
+
+/* This is for compression type. PNG 1.0-1.2 only define the single type. */
+#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
+#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
+
+/* This is for filter type. PNG 1.0-1.2 only define the single type. */
+#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
+#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
+#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
+
+/* These are for the interlacing type.  These values should NOT be changed. */
+#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
+#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
+#define PNG_INTERLACE_LAST        2 /* Not a valid value */
+
+/* These are for the oFFs chunk.  These values should NOT be changed. */
+#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
+#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
+#define PNG_OFFSET_LAST           2 /* Not a valid value */
+
+/* These are for the pCAL chunk.  These values should NOT be changed. */
+#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
+#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
+#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
+#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
+#define PNG_EQUATION_LAST         4 /* Not a valid value */
+
+/* These are for the sCAL chunk.  These values should NOT be changed. */
+#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
+#define PNG_SCALE_METER           1 /* meters per pixel */
+#define PNG_SCALE_RADIAN          2 /* radians per pixel */
+#define PNG_SCALE_LAST            3 /* Not a valid value */
+
+/* These are for the pHYs chunk.  These values should NOT be changed. */
+#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
+#define PNG_RESOLUTION_METER      1 /* pixels/meter */
+#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
+
+/* These are for the sRGB chunk.  These values should NOT be changed. */
+#define PNG_sRGB_INTENT_PERCEPTUAL 0
+#define PNG_sRGB_INTENT_RELATIVE   1
+#define PNG_sRGB_INTENT_SATURATION 2
+#define PNG_sRGB_INTENT_ABSOLUTE   3
+#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
+
+/* This is for text chunks */
+#define PNG_KEYWORD_MAX_LENGTH     79
+
+/* Maximum number of entries in PLTE/sPLT/tRNS arrays */
+#define PNG_MAX_PALETTE_LENGTH    256
+
+/* These determine if an ancillary chunk's data has been successfully read
+ * from the PNG header, or if the application has filled in the corresponding
+ * data in the info_struct to be written into the output file.  The values
+ * of the PNG_INFO_<chunk> defines should NOT be changed.
+ */
+#define PNG_INFO_gAMA 0x0001
+#define PNG_INFO_sBIT 0x0002
+#define PNG_INFO_cHRM 0x0004
+#define PNG_INFO_PLTE 0x0008
+#define PNG_INFO_tRNS 0x0010
+#define PNG_INFO_bKGD 0x0020
+#define PNG_INFO_hIST 0x0040
+#define PNG_INFO_pHYs 0x0080
+#define PNG_INFO_oFFs 0x0100
+#define PNG_INFO_tIME 0x0200
+#define PNG_INFO_pCAL 0x0400
+#define PNG_INFO_sRGB 0x0800   /* GR-P, 0.96a */
+#define PNG_INFO_iCCP 0x1000   /* ESR, 1.0.6 */
+#define PNG_INFO_sPLT 0x2000   /* ESR, 1.0.6 */
+#define PNG_INFO_sCAL 0x4000   /* ESR, 1.0.6 */
+#define PNG_INFO_IDAT 0x8000L  /* ESR, 1.0.6 */
+
+/* This is used for the transformation routines, as some of them
+ * change these values for the row.  It also should enable using
+ * the routines for other purposes.
+ */
+typedef struct png_row_info_struct
+{
+   png_uint_32 width; /* width of row */
+   png_uint_32 rowbytes; /* number of bytes in row */
+   png_byte color_type; /* color type of row */
+   png_byte bit_depth; /* bit depth of row */
+   png_byte channels; /* number of channels (1, 2, 3, or 4) */
+   png_byte pixel_depth; /* bits per pixel (depth * channels) */
+} png_row_info;
+
+typedef png_row_info FAR * png_row_infop;
+typedef png_row_info FAR * FAR * png_row_infopp;
+
+/* These are the function types for the I/O functions and for the functions
+ * that allow the user to override the default I/O functions with his or her
+ * own.  The png_error_ptr type should match that of user-supplied warning
+ * and error functions, while the png_rw_ptr type should match that of the
+ * user read/write data functions.
+ */
+typedef struct png_struct_def png_struct;
+typedef png_struct FAR * png_structp;
+
+typedef void (PNGAPI *png_error_ptr) PNGARG((png_structp, png_const_charp));
+typedef void (PNGAPI *png_rw_ptr) PNGARG((png_structp, png_bytep, png_size_t));
+typedef void (PNGAPI *png_flush_ptr) PNGARG((png_structp));
+typedef void (PNGAPI *png_read_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+typedef void (PNGAPI *png_write_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+typedef void (PNGAPI *png_progressive_info_ptr) PNGARG((png_structp, png_infop));
+typedef void (PNGAPI *png_progressive_end_ptr) PNGARG((png_structp, png_infop));
+typedef void (PNGAPI *png_progressive_row_ptr) PNGARG((png_structp, png_bytep,
+   png_uint_32, int));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+typedef void (PNGAPI *png_user_transform_ptr) PNGARG((png_structp,
+    png_row_infop, png_bytep));
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+typedef int (PNGAPI *png_user_chunk_ptr) PNGARG((png_structp, png_unknown_chunkp));
+#endif
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+typedef void (PNGAPI *png_unknown_chunk_ptr) PNGARG((png_structp));
+#endif
+
+/* Transform masks for the high-level interface */
+#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
+#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
+#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
+#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
+#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
+#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
+#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
+#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
+#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
+#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
+#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
+#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
+#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* WRITE only */
+
+/* Flags for MNG supported features */
+#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
+#define PNG_FLAG_MNG_FILTER_64      0x04
+#define PNG_ALL_MNG_FEATURES        0x05
+
+typedef png_voidp (*png_malloc_ptr) PNGARG((png_structp, png_size_t));
+typedef void (*png_free_ptr) PNGARG((png_structp, png_voidp));
+
+/* The structure that holds the information to read and write PNG files.
+ * The only people who need to care about what is inside of this are the
+ * people who will be modifying the library for their own special needs.
+ * It should NOT be accessed directly by an application, except to store
+ * the jmp_buf.
+ */
+
+struct png_struct_def
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf jmpbuf;            /* used in png_error */
+#endif
+   png_error_ptr error_fn;    /* function for printing errors and aborting */
+   png_error_ptr warning_fn;  /* function for printing warnings */
+   png_voidp error_ptr;       /* user supplied struct for error functions */
+   png_rw_ptr write_data_fn;  /* function for writing output data */
+   png_rw_ptr read_data_fn;   /* function for reading input data */
+   png_voidp io_ptr;          /* ptr to application struct for I/O functions */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   png_user_transform_ptr read_user_transform_fn; /* user read transform */
+#endif
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_user_transform_ptr write_user_transform_fn; /* user write transform */
+#endif
+
+/* These were added in libpng-1.0.2 */
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_voidp user_transform_ptr; /* user supplied struct for user transform */
+   png_byte user_transform_depth;    /* bit depth of user transformed pixels */
+   png_byte user_transform_channels; /* channels in user transformed pixels */
+#endif
+#endif
+
+   png_uint_32 mode;          /* tells us where we are in the PNG file */
+   png_uint_32 flags;         /* flags indicating various things to libpng */
+   png_uint_32 transformations; /* which transformations to perform */
+
+   z_stream zstream;          /* pointer to decompression structure (below) */
+   png_bytep zbuf;            /* buffer for zlib */
+   png_size_t zbuf_size;      /* size of zbuf */
+   int zlib_level;            /* holds zlib compression level */
+   int zlib_method;           /* holds zlib compression method */
+   int zlib_window_bits;      /* holds zlib compression window bits */
+   int zlib_mem_level;        /* holds zlib compression memory level */
+   int zlib_strategy;         /* holds zlib compression strategy */
+
+   png_uint_32 width;         /* width of image in pixels */
+   png_uint_32 height;        /* height of image in pixels */
+   png_uint_32 num_rows;      /* number of rows in current pass */
+   png_uint_32 usr_width;     /* width of row at start of write */
+   png_uint_32 rowbytes;      /* size of row in bytes */
+   png_uint_32 irowbytes;     /* size of current interlaced row in bytes */
+   png_uint_32 iwidth;        /* width of current interlaced row in pixels */
+   png_uint_32 row_number;    /* current row in interlace pass */
+   png_bytep prev_row;        /* buffer to save previous (unfiltered) row */
+   png_bytep row_buf;         /* buffer to save current (unfiltered) row */
+#ifndef PNG_NO_WRITE_FILTERING
+   png_bytep sub_row;         /* buffer to save "sub" row when filtering */
+   png_bytep up_row;          /* buffer to save "up" row when filtering */
+   png_bytep avg_row;         /* buffer to save "avg" row when filtering */
+   png_bytep paeth_row;       /* buffer to save "Paeth" row when filtering */
+#endif
+   png_row_info row_info;     /* used for transformation routines */
+
+   png_uint_32 idat_size;     /* current IDAT size for read */
+   png_uint_32 crc;           /* current chunk CRC value */
+   png_colorp palette;        /* palette from the input file */
+   png_uint_16 num_palette;   /* number of color entries in palette */
+   png_uint_16 num_trans;     /* number of transparency values */
+   png_byte chunk_name[5];    /* null-terminated name of current chunk */
+   png_byte compression;      /* file compression type (always 0) */
+   png_byte filter;           /* file filter type (always 0) */
+   png_byte interlaced;       /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+   png_byte pass;             /* current interlace pass (0 - 6) */
+   png_byte do_filter;        /* row filter flags (see PNG_FILTER_ below ) */
+   png_byte color_type;       /* color type of file */
+   png_byte bit_depth;        /* bit depth of file */
+   png_byte usr_bit_depth;    /* bit depth of users row */
+   png_byte pixel_depth;      /* number of bits per pixel */
+   png_byte channels;         /* number of channels in file */
+   png_byte usr_channels;     /* channels at start of write */
+   png_byte sig_bytes;        /* magic bytes read/written from start of file */
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+#ifdef PNG_LEGACY_SUPPORTED
+   png_byte filler;           /* filler byte for pixel expansion */
+#else
+   png_uint_16 filler;           /* filler bytes for pixel expansion */
+#endif
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED)
+   png_byte background_gamma_type;
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+   float background_gamma;
+#  endif
+   png_color_16 background;   /* background color in screen gamma space */
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   png_color_16 background_1; /* background normalized to gamma 1.0 */
+#endif
+#endif /* PNG_bKGD_SUPPORTED */
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_flush_ptr output_flush_fn;/* Function for flushing output */
+   png_uint_32 flush_dist;    /* how many rows apart to flush, 0 - no flush */
+   png_uint_32 flush_rows;    /* number of rows written since last flush */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   int gamma_shift;      /* number of "insignificant" bits 16-bit gamma */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float gamma;          /* file gamma value */
+   float screen_gamma;   /* screen gamma value (display_exponent) */
+#endif
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep gamma_table;     /* gamma table for 8-bit depth files */
+   png_bytep gamma_from_1;    /* converts from 1.0 to screen */
+   png_bytep gamma_to_1;      /* converts from file to 1.0 */
+   png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */
+   png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */
+   png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
+   png_color_8 sig_bit;       /* significant bits in each available channel */
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+   png_color_8 shift;         /* shift for significant bit tranformation */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
+ || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep trans;           /* transparency values for paletted files */
+   png_color_16 trans_values; /* transparency values for non-paletted files */
+#endif
+
+   png_read_status_ptr read_row_fn;   /* called after each row is decoded */
+   png_write_status_ptr write_row_fn; /* called after each row is encoded */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_progressive_info_ptr info_fn; /* called after header data fully read */
+   png_progressive_row_ptr row_fn;   /* called after each prog. row is decoded */
+   png_progressive_end_ptr end_fn;   /* called after image is complete */
+   png_bytep save_buffer_ptr;        /* current location in save_buffer */
+   png_bytep save_buffer;            /* buffer for previously read data */
+   png_bytep current_buffer_ptr;     /* current location in current_buffer */
+   png_bytep current_buffer;         /* buffer for recently used data */
+   png_uint_32 push_length;          /* size of current input chunk */
+   png_uint_32 skip_length;          /* bytes to skip in input data */
+   png_size_t save_buffer_size;      /* amount of data now in save_buffer */
+   png_size_t save_buffer_max;       /* total size of save_buffer */
+   png_size_t buffer_size;           /* total amount of available input data */
+   png_size_t current_buffer_size;   /* amount of data now in current_buffer */
+   int process_mode;                 /* what push library is currently doing */
+   int cur_palette;                  /* current push library palette index */
+
+#  if defined(PNG_TEXT_SUPPORTED)
+     png_size_t current_text_size;   /* current size of text input data */
+     png_size_t current_text_left;   /* how much text left to read in input */
+     png_charp current_text;         /* current text chunk buffer */
+     png_charp current_text_ptr;     /* current location in current_text */
+#  endif /* PNG_TEXT_SUPPORTED */
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
+/* for the Borland special 64K segment handler */
+   png_bytepp offset_table_ptr;
+   png_bytep offset_table;
+   png_uint_16 offset_table_number;
+   png_uint_16 offset_table_count;
+   png_uint_16 offset_table_count_free;
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   png_bytep palette_lookup;         /* lookup table for dithering */
+   png_bytep dither_index;           /* index translation for palette files */
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED) || defined(PNG_hIST_SUPPORTED)
+   png_uint_16p hist;                /* histogram */
+#endif
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_byte heuristic_method;        /* heuristic for row filter selection */
+   png_byte num_prev_filters;        /* number of weights for previous rows */
+   png_bytep prev_filters;           /* filter type(s) of previous row(s) */
+   png_uint_16p filter_weights;      /* weight(s) for previous line(s) */
+   png_uint_16p inv_filter_weights;  /* 1/weight(s) for previous line(s) */
+   png_uint_16p filter_costs;        /* relative filter calculation cost */
+   png_uint_16p inv_filter_costs;    /* 1/relative filter calculation cost */
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_charp time_buffer;            /* String to hold RFC 1123 time text */
+#endif
+
+/* New members added in libpng-1.0.6 */
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_uint_32 free_me;       /* flags items libpng is responsible for freeing */
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+   png_voidp user_chunk_ptr;
+   png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   int num_chunk_list;
+   png_bytep chunk_list;
+#endif
+
+/* New members added in libpng-1.0.3 */
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   png_byte rgb_to_gray_status;
+   /* These were changed from png_byte in libpng-1.0.6 */
+   png_uint_16 rgb_to_gray_red_coeff;
+   png_uint_16 rgb_to_gray_green_coeff;
+   png_uint_16 rgb_to_gray_blue_coeff;
+#endif
+
+/* New member added in libpng-1.0.4 (renamed in 1.0.9) */
+#if defined(PNG_MNG_FEATURES_SUPPORTED) || \
+    defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+/* changed from png_byte to png_uint_32 at version 1.2.0 */
+#ifdef PNG_1_0_X
+   png_byte mng_features_permitted;
+#else
+   png_uint_32 mng_features_permitted;
+#endif /* PNG_1_0_X */
+#endif
+
+/* New member added in libpng-1.0.7 */
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_fixed_point int_gamma;
+#endif
+
+/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_byte filter_type;
+#endif
+
+#if defined(PNG_1_0_X)
+/* New member added in libpng-1.0.10, ifdef'ed out in 1.2.0 */
+   png_uint_32 row_buf_size;
+#endif
+
+/* New members added in libpng-1.2.0 */
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#  if !defined(PNG_1_0_X)
+#    if defined(PNG_MMX_CODE_SUPPORTED)
+   png_byte     mmx_bitdepth_threshold;
+   png_uint_32  mmx_rowbytes_threshold;
+#    endif
+   png_uint_32  asm_flags;
+#  endif
+#endif
+
+/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_voidp mem_ptr;                /* user supplied struct for mem functions */
+   png_malloc_ptr malloc_fn;         /* function for allocating memory */
+   png_free_ptr free_fn;             /* function for freeing memory */
+#endif
+
+/* New member added in libpng-1.0.13 and 1.2.0 */
+   png_bytep big_row_buf;         /* buffer to save current (unfiltered) row */
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* The following three members were added at version 1.0.14 and 1.2.4 */
+   png_bytep dither_sort;            /* working sort array */
+   png_bytep index_to_palette;       /* where the original index currently is */
+                                     /* in the palette */
+   png_bytep palette_to_index;       /* which original index points to this */
+                                     /* palette color */
+#endif
+
+/* New members added in libpng-1.0.16 and 1.2.6 */
+   png_byte compression_type;
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_uint_32 user_width_max;
+   png_uint_32 user_height_max;
+#endif
+
+/* New member added in libpng-1.0.25 and 1.2.17 */
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   /* storage for unknown chunk that the library doesn't recognize. */
+   png_unknown_chunk unknown_chunk;
+#endif
+
+/* New members added in libpng-1.2.26 */
+  png_uint_32 old_big_row_buf_size, old_prev_row_size;
+};
+
+
+/* This triggers a compiler error in png.c, if png.c and png.h
+ * do not agree upon the version number.
+ */
+typedef png_structp version_1_2_29;
+
+typedef png_struct FAR * FAR * png_structpp;
+
+/* Here are the function definitions most commonly used.  This is not
+ * the place to find out how to use libpng.  See libpng.txt for the
+ * full explanation, see example.c for the summary.  This just provides
+ * a simple one line description of the use of each function.
+ */
+
+/* Returns the version number of the library */
+extern PNG_EXPORT(png_uint_32,png_access_version_number) PNGARG((void));
+
+/* Tell lib we have already handled the first <num_bytes> magic bytes.
+ * Handling more than 8 bytes from the beginning of the file is an error.
+ */
+extern PNG_EXPORT(void,png_set_sig_bytes) PNGARG((png_structp png_ptr,
+   int num_bytes));
+
+/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
+ * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
+ * signature, and non-zero otherwise.  Having num_to_check == 0 or
+ * start > 7 will always fail (ie return non-zero).
+ */
+extern PNG_EXPORT(int,png_sig_cmp) PNGARG((png_bytep sig, png_size_t start,
+   png_size_t num_to_check));
+
+/* Simple signature checking function.  This is the same as calling
+ * png_check_sig(sig, n) := !png_sig_cmp(sig, 0, n).
+ */
+extern PNG_EXPORT(int,png_check_sig) PNGARG((png_bytep sig, int num));
+
+/* Allocate and initialize png_ptr struct for reading, and any other memory. */
+extern PNG_EXPORT(png_structp,png_create_read_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn));
+
+/* Allocate and initialize png_ptr struct for writing, and any other memory */
+extern PNG_EXPORT(png_structp,png_create_write_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn));
+
+#ifdef PNG_WRITE_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_compression_buffer_size)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+extern PNG_EXPORT(void,png_set_compression_buffer_size)
+   PNGARG((png_structp png_ptr, png_uint_32 size));
+#endif
+
+/* Reset the compression stream */
+extern PNG_EXPORT(int,png_reset_zstream) PNGARG((png_structp png_ptr));
+
+/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_structp,png_create_read_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+extern PNG_EXPORT(png_structp,png_create_write_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+#endif
+
+/* Write a PNG chunk - size, type, (optional) data, CRC. */
+extern PNG_EXPORT(void,png_write_chunk) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_bytep data, png_size_t length));
+
+/* Write the start of a PNG chunk - length and chunk name. */
+extern PNG_EXPORT(void,png_write_chunk_start) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_uint_32 length));
+
+/* Write the data of a PNG chunk started with png_write_chunk_start(). */
+extern PNG_EXPORT(void,png_write_chunk_data) PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+/* Finish a chunk started with png_write_chunk_start() (includes CRC). */
+extern PNG_EXPORT(void,png_write_chunk_end) PNGARG((png_structp png_ptr));
+
+/* Allocate and initialize the info structure */
+extern PNG_EXPORT(png_infop,png_create_info_struct)
+   PNGARG((png_structp png_ptr));
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize the info structure (old interface - DEPRECATED) */
+extern PNG_EXPORT(void,png_info_init) PNGARG((png_infop info_ptr));
+#undef png_info_init
+#define png_info_init(info_ptr) png_info_init_3(&info_ptr,\
+    png_sizeof(png_info));
+#endif
+
+extern PNG_EXPORT(void,png_info_init_3) PNGARG((png_infopp info_ptr,
+    png_size_t png_info_struct_size));
+
+/* Writes all the PNG information before the image. */
+extern PNG_EXPORT(void,png_write_info_before_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+extern PNG_EXPORT(void,png_write_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the information before the actual image data. */
+extern PNG_EXPORT(void,png_read_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+extern PNG_EXPORT(png_charp,png_convert_to_rfc1123)
+   PNGARG((png_structp png_ptr, png_timep ptime));
+#endif
+
+#if !defined(_WIN32_WCE)
+/* "time.h" functions are not supported on WindowsCE */
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+/* convert from a struct tm to png_time */
+extern PNG_EXPORT(void,png_convert_from_struct_tm) PNGARG((png_timep ptime,
+   struct tm FAR * ttime));
+
+/* convert from time_t to png_time.  Uses gmtime() */
+extern PNG_EXPORT(void,png_convert_from_time_t) PNGARG((png_timep ptime,
+   time_t ttime));
+#endif /* PNG_WRITE_tIME_SUPPORTED */
+#endif /* _WIN32_WCE */
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
+extern PNG_EXPORT(void,png_set_expand) PNGARG((png_structp png_ptr));
+#if !defined(PNG_1_0_X)
+extern PNG_EXPORT(void,png_set_expand_gray_1_2_4_to_8) PNGARG((png_structp
+  png_ptr));
+#endif
+extern PNG_EXPORT(void,png_set_palette_to_rgb) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(void,png_set_tRNS_to_alpha) PNGARG((png_structp png_ptr));
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated */
+extern PNG_EXPORT(void,png_set_gray_1_2_4_to_8) PNGARG((png_structp png_ptr));
+#endif
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* Use blue, green, red order for pixels. */
+extern PNG_EXPORT(void,png_set_bgr) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+/* Expand the grayscale to 24-bit RGB if necessary. */
+extern PNG_EXPORT(void,png_set_gray_to_rgb) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+/* Reduce RGB to grayscale. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_rgb_to_gray) PNGARG((png_structp png_ptr,
+   int error_action, double red, double green ));
+#endif
+extern PNG_EXPORT(void,png_set_rgb_to_gray_fixed) PNGARG((png_structp png_ptr,
+   int error_action, png_fixed_point red, png_fixed_point green ));
+extern PNG_EXPORT(png_byte,png_get_rgb_to_gray_status) PNGARG((png_structp
+   png_ptr));
+#endif
+
+extern PNG_EXPORT(void,png_build_grayscale_palette) PNGARG((int bit_depth,
+   png_colorp palette));
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_strip_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_swap_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_invert_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte to 8-bit Gray or 24-bit RGB images. */
+extern PNG_EXPORT(void,png_set_filler) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+/* The values of the PNG_FILLER_ defines should NOT be changed */
+#define PNG_FILLER_BEFORE 0
+#define PNG_FILLER_AFTER 1
+/* Add an alpha byte to 8-bit Gray or 24-bit RGB images. */
+#if !defined(PNG_1_0_X)
+extern PNG_EXPORT(void,png_set_add_alpha) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+#endif
+#endif /* PNG_READ_FILLER_SUPPORTED || PNG_WRITE_FILLER_SUPPORTED */
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* Swap bytes in 16-bit depth files. */
+extern PNG_EXPORT(void,png_set_swap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
+extern PNG_EXPORT(void,png_set_packing) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* Swap packing order of pixels in bytes. */
+extern PNG_EXPORT(void,png_set_packswap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Converts files to legal bit depths. */
+extern PNG_EXPORT(void,png_set_shift) PNGARG((png_structp png_ptr,
+   png_color_8p true_bits));
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Have the code handle the interlacing.  Returns the number of passes. */
+extern PNG_EXPORT(int,png_set_interlace_handling) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+/* Invert monochrome files */
+extern PNG_EXPORT(void,png_set_invert_mono) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+/* Handle alpha and tRNS by replacing with a background color. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_background) PNGARG((png_structp png_ptr,
+   png_color_16p background_color, int background_gamma_code,
+   int need_expand, double background_gamma));
+#endif
+#define PNG_BACKGROUND_GAMMA_UNKNOWN 0
+#define PNG_BACKGROUND_GAMMA_SCREEN  1
+#define PNG_BACKGROUND_GAMMA_FILE    2
+#define PNG_BACKGROUND_GAMMA_UNIQUE  3
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* strip the second byte of information from a 16-bit depth file. */
+extern PNG_EXPORT(void,png_set_strip_16) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* Turn on dithering, and reduce the palette to the number of colors available. */
+extern PNG_EXPORT(void,png_set_dither) PNGARG((png_structp png_ptr,
+   png_colorp palette, int num_palette, int maximum_colors,
+   png_uint_16p histogram, int full_dither));
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+/* Handle gamma correction. Screen_gamma=(display_exponent) */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gamma) PNGARG((png_structp png_ptr,
+   double screen_gamma, double default_file_gamma));
+#endif
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+#if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+/* Permit or disallow empty PLTE (0: not permitted, 1: permitted) */
+/* Deprecated and will be removed.  Use png_permit_mng_features() instead. */
+extern PNG_EXPORT(void,png_permit_empty_plte) PNGARG((png_structp png_ptr,
+   int empty_plte_permitted));
+#endif
+#endif
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+/* Set how many lines between output flushes - 0 for no flushing */
+extern PNG_EXPORT(void,png_set_flush) PNGARG((png_structp png_ptr, int nrows));
+/* Flush the current PNG output buffer */
+extern PNG_EXPORT(void,png_write_flush) PNGARG((png_structp png_ptr));
+#endif
+
+/* optional update palette with requested transformations */
+extern PNG_EXPORT(void,png_start_read_image) PNGARG((png_structp png_ptr));
+
+/* optional call to update the users info structure */
+extern PNG_EXPORT(void,png_read_update_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read one or more rows of image data. */
+extern PNG_EXPORT(void,png_read_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_bytepp display_row, png_uint_32 num_rows));
+#endif
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read a row of data. */
+extern PNG_EXPORT(void,png_read_row) PNGARG((png_structp png_ptr,
+   png_bytep row,
+   png_bytep display_row));
+#endif
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the whole image into memory at once. */
+extern PNG_EXPORT(void,png_read_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+#endif
+
+/* write a row of image data */
+extern PNG_EXPORT(void,png_write_row) PNGARG((png_structp png_ptr,
+   png_bytep row));
+
+/* write a few rows of image data */
+extern PNG_EXPORT(void,png_write_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_uint_32 num_rows));
+
+/* write the image data */
+extern PNG_EXPORT(void,png_write_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+
+/* writes the end of the PNG file. */
+extern PNG_EXPORT(void,png_write_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the end of the PNG file. */
+extern PNG_EXPORT(void,png_read_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+/* free any memory associated with the png_info_struct */
+extern PNG_EXPORT(void,png_destroy_info_struct) PNGARG((png_structp png_ptr,
+   png_infopp info_ptr_ptr));
+
+/* free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_read_struct) PNGARG((png_structpp
+   png_ptr_ptr, png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
+
+/* free all memory used by the read (old method - NOT DLL EXPORTED) */
+extern void png_read_destroy PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_infop end_info_ptr));
+
+/* free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_write_struct)
+   PNGARG((png_structpp png_ptr_ptr, png_infopp info_ptr_ptr));
+
+/* free any memory used in png_ptr struct (old method - NOT DLL EXPORTED) */
+extern void png_write_destroy PNGARG((png_structp png_ptr));
+
+/* set the libpng method of handling chunk CRC errors */
+extern PNG_EXPORT(void,png_set_crc_action) PNGARG((png_structp png_ptr,
+   int crit_action, int ancil_action));
+
+/* Values for png_set_crc_action() to say how to handle CRC errors in
+ * ancillary and critical chunks, and whether to use the data contained
+ * therein.  Note that it is impossible to "discard" data in a critical
+ * chunk.  For versions prior to 0.90, the action was always error/quit,
+ * whereas in version 0.90 and later, the action for CRC errors in ancillary
+ * chunks is warn/discard.  These values should NOT be changed.
+ *
+ *      value                       action:critical     action:ancillary
+ */
+#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
+#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
+#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
+#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
+#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
+#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
+
+/* These functions give the user control over the scan-line filtering in
+ * libpng and the compression methods used by zlib.  These functions are
+ * mainly useful for testing, as the defaults should work with most users.
+ * Those users who are tight on memory or want faster performance at the
+ * expense of compression can modify them.  See the compression library
+ * header file (zlib.h) for an explination of the compression functions.
+ */
+
+/* set the filtering method(s) used by libpng.  Currently, the only valid
+ * value for "method" is 0.
+ */
+extern PNG_EXPORT(void,png_set_filter) PNGARG((png_structp png_ptr, int method,
+   int filters));
+
+/* Flags for png_set_filter() to say which filters to use.  The flags
+ * are chosen so that they don't conflict with real filter types
+ * below, in case they are supplied instead of the #defined constants.
+ * These values should NOT be changed.
+ */
+#define PNG_NO_FILTERS     0x00
+#define PNG_FILTER_NONE    0x08
+#define PNG_FILTER_SUB     0x10
+#define PNG_FILTER_UP      0x20
+#define PNG_FILTER_AVG     0x40
+#define PNG_FILTER_PAETH   0x80
+#define PNG_ALL_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP | \
+                         PNG_FILTER_AVG | PNG_FILTER_PAETH)
+
+/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
+ * These defines should NOT be changed.
+ */
+#define PNG_FILTER_VALUE_NONE  0
+#define PNG_FILTER_VALUE_SUB   1
+#define PNG_FILTER_VALUE_UP    2
+#define PNG_FILTER_VALUE_AVG   3
+#define PNG_FILTER_VALUE_PAETH 4
+#define PNG_FILTER_VALUE_LAST  5
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED) /* EXPERIMENTAL */
+/* The "heuristic_method" is given by one of the PNG_FILTER_HEURISTIC_
+ * defines, either the default (minimum-sum-of-absolute-differences), or
+ * the experimental method (weighted-minimum-sum-of-absolute-differences).
+ *
+ * Weights are factors >= 1.0, indicating how important it is to keep the
+ * filter type consistent between rows.  Larger numbers mean the current
+ * filter is that many times as likely to be the same as the "num_weights"
+ * previous filters.  This is cumulative for each previous row with a weight.
+ * There needs to be "num_weights" values in "filter_weights", or it can be
+ * NULL if the weights aren't being specified.  Weights have no influence on
+ * the selection of the first row filter.  Well chosen weights can (in theory)
+ * improve the compression for a given image.
+ *
+ * Costs are factors >= 1.0 indicating the relative decoding costs of a
+ * filter type.  Higher costs indicate more decoding expense, and are
+ * therefore less likely to be selected over a filter with lower computational
+ * costs.  There needs to be a value in "filter_costs" for each valid filter
+ * type (given by PNG_FILTER_VALUE_LAST), or it can be NULL if you aren't
+ * setting the costs.  Costs try to improve the speed of decompression without
+ * unduly increasing the compressed image size.
+ *
+ * A negative weight or cost indicates the default value is to be used, and
+ * values in the range [0.0, 1.0) indicate the value is to remain unchanged.
+ * The default values for both weights and costs are currently 1.0, but may
+ * change if good general weighting/cost heuristics can be found.  If both
+ * the weights and costs are set to 1.0, this degenerates the WEIGHTED method
+ * to the UNWEIGHTED method, but with added encoding time/computation.
+ */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_filter_heuristics) PNGARG((png_structp png_ptr,
+   int heuristic_method, int num_weights, png_doublep filter_weights,
+   png_doublep filter_costs));
+#endif
+#endif /*  PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
+
+/* Heuristic used for row filter selection.  These defines should NOT be
+ * changed.
+ */
+#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
+#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
+#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
+#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
+
+/* Set the library compression level.  Currently, valid values range from
+ * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
+ * (0 - no compression, 9 - "maximal" compression).  Note that tests have
+ * shown that zlib compression levels 3-6 usually perform as well as level 9
+ * for PNG images, and do considerably fewer caclulations.  In the future,
+ * these values may not correspond directly to the zlib compression levels.
+ */
+extern PNG_EXPORT(void,png_set_compression_level) PNGARG((png_structp png_ptr,
+   int level));
+
+extern PNG_EXPORT(void,png_set_compression_mem_level)
+   PNGARG((png_structp png_ptr, int mem_level));
+
+extern PNG_EXPORT(void,png_set_compression_strategy)
+   PNGARG((png_structp png_ptr, int strategy));
+
+extern PNG_EXPORT(void,png_set_compression_window_bits)
+   PNGARG((png_structp png_ptr, int window_bits));
+
+extern PNG_EXPORT(void,png_set_compression_method) PNGARG((png_structp png_ptr,
+   int method));
+
+/* These next functions are called for input/output, memory, and error
+ * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
+ * and call standard C I/O routines such as fread(), fwrite(), and
+ * fprintf().  These functions can be made to use other I/O routines
+ * at run time for those applications that need to handle I/O in a
+ * different manner by calling png_set_???_fn().  See libpng.txt for
+ * more information.
+ */
+
+#if !defined(PNG_NO_STDIO)
+/* Initialize the input/output for the PNG file to the default functions. */
+extern PNG_EXPORT(void,png_init_io) PNGARG((png_structp png_ptr, png_FILE_p fp));
+#endif
+
+/* Replace the (error and abort), and warning functions with user
+ * supplied functions.  If no messages are to be printed you must still
+ * write and use replacement functions. The replacement error_fn should
+ * still do a longjmp to the last setjmp location if you are using this
+ * method of error handling.  If error_fn or warning_fn is NULL, the
+ * default function will be used.
+ */
+
+extern PNG_EXPORT(void,png_set_error_fn) PNGARG((png_structp png_ptr,
+   png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn));
+
+/* Return the user pointer associated with the error functions */
+extern PNG_EXPORT(png_voidp,png_get_error_ptr) PNGARG((png_structp png_ptr));
+
+/* Replace the default data output functions with a user supplied one(s).
+ * If buffered output is not used, then output_flush_fn can be set to NULL.
+ * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
+ * output_flush_fn will be ignored (and thus can be NULL).
+ */
+extern PNG_EXPORT(void,png_set_write_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
+
+/* Replace the default data input function with a user supplied one. */
+extern PNG_EXPORT(void,png_set_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr read_data_fn));
+
+/* Return the user pointer associated with the I/O functions */
+extern PNG_EXPORT(png_voidp,png_get_io_ptr) PNGARG((png_structp png_ptr));
+
+extern PNG_EXPORT(void,png_set_read_status_fn) PNGARG((png_structp png_ptr,
+   png_read_status_ptr read_row_fn));
+
+extern PNG_EXPORT(void,png_set_write_status_fn) PNGARG((png_structp png_ptr,
+   png_write_status_ptr write_row_fn));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* Replace the default memory allocation functions with user supplied one(s). */
+extern PNG_EXPORT(void,png_set_mem_fn) PNGARG((png_structp png_ptr,
+   png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+/* Return the user pointer associated with the memory functions */
+extern PNG_EXPORT(png_voidp,png_get_mem_ptr) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_read_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr read_user_transform_fn));
+#endif
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_write_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr write_user_transform_fn));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_user_transform_info) PNGARG((png_structp
+   png_ptr, png_voidp user_transform_ptr, int user_transform_depth,
+   int user_transform_channels));
+/* Return the user pointer associated with the user transform functions */
+extern PNG_EXPORT(png_voidp,png_get_user_transform_ptr)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+extern PNG_EXPORT(void,png_set_read_user_chunk_fn) PNGARG((png_structp png_ptr,
+   png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
+extern PNG_EXPORT(png_voidp,png_get_user_chunk_ptr) PNGARG((png_structp
+   png_ptr));
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+/* Sets the function callbacks for the push reader, and a pointer to a
+ * user-defined structure available to the callback functions.
+ */
+extern PNG_EXPORT(void,png_set_progressive_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp progressive_ptr,
+   png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
+   png_progressive_end_ptr end_fn));
+
+/* returns the user pointer associated with the push read functions */
+extern PNG_EXPORT(png_voidp,png_get_progressive_ptr)
+   PNGARG((png_structp png_ptr));
+
+/* function to be called when data becomes available */
+extern PNG_EXPORT(void,png_process_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep buffer, png_size_t buffer_size));
+
+/* function that combines rows.  Not very much different than the
+ * png_combine_row() call.  Is this even used?????
+ */
+extern PNG_EXPORT(void,png_progressive_combine_row) PNGARG((png_structp png_ptr,
+   png_bytep old_row, png_bytep new_row));
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+extern PNG_EXPORT(png_voidp,png_malloc) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+
+#if defined(PNG_1_0_X)
+#  define png_malloc_warn png_malloc
+#else
+/* Added at libpng version 1.2.4 */
+extern PNG_EXPORT(png_voidp,png_malloc_warn) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+#endif
+
+/* frees a pointer allocated by png_malloc() */
+extern PNG_EXPORT(void,png_free) PNGARG((png_structp png_ptr, png_voidp ptr));
+
+#if defined(PNG_1_0_X)
+/* Function to allocate memory for zlib. */
+extern PNG_EXPORT(voidpf,png_zalloc) PNGARG((voidpf png_ptr, uInt items,
+   uInt size));
+
+/* Function to free memory for zlib */
+extern PNG_EXPORT(void,png_zfree) PNGARG((voidpf png_ptr, voidpf ptr));
+#endif
+
+/* Free data that was allocated internally */
+extern PNG_EXPORT(void,png_free_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 free_me, int num));
+#ifdef PNG_FREE_ME_SUPPORTED
+/* Reassign responsibility for freeing existing data, whether allocated
+ * by libpng or by the application */
+extern PNG_EXPORT(void,png_data_freer) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int freer, png_uint_32 mask));
+#endif
+/* assignments for png_data_freer */
+#define PNG_DESTROY_WILL_FREE_DATA 1
+#define PNG_SET_WILL_FREE_DATA 1
+#define PNG_USER_WILL_FREE_DATA 2
+/* Flags for png_ptr->free_me and info_ptr->free_me */
+#define PNG_FREE_HIST 0x0008
+#define PNG_FREE_ICCP 0x0010
+#define PNG_FREE_SPLT 0x0020
+#define PNG_FREE_ROWS 0x0040
+#define PNG_FREE_PCAL 0x0080
+#define PNG_FREE_SCAL 0x0100
+#define PNG_FREE_UNKN 0x0200
+#define PNG_FREE_LIST 0x0400
+#define PNG_FREE_PLTE 0x1000
+#define PNG_FREE_TRNS 0x2000
+#define PNG_FREE_TEXT 0x4000
+#define PNG_FREE_ALL  0x7fff
+#define PNG_FREE_MUL  0x4220 /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
+
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_voidp,png_malloc_default) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+extern PNG_EXPORT(void,png_free_default) PNGARG((png_structp png_ptr,
+   png_voidp ptr));
+#endif
+
+extern PNG_EXPORT(png_voidp,png_memcpy_check) PNGARG((png_structp png_ptr,
+   png_voidp s1, png_voidp s2, png_uint_32 size));
+
+extern PNG_EXPORT(png_voidp,png_memset_check) PNGARG((png_structp png_ptr,
+   png_voidp s1, int value, png_uint_32 size));
+
+#if defined(USE_FAR_KEYWORD)  /* memory model conversion function */
+extern void *png_far_to_near PNGARG((png_structp png_ptr,png_voidp ptr,
+   int check));
+#endif /* USE_FAR_KEYWORD */
+
+#ifndef PNG_NO_ERROR_TEXT
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message));
+
+/* The same, but the chunk name is prepended to the error string. */
+extern PNG_EXPORT(void,png_chunk_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message));
+#else
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_err) PNGARG((png_structp png_ptr));
+#endif
+
+#ifndef PNG_NO_WARNINGS
+/* Non-fatal error in libpng.  Can continue, but may have a problem. */
+extern PNG_EXPORT(void,png_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+#ifdef PNG_READ_SUPPORTED
+/* Non-fatal error in libpng, chunk name is prepended to message. */
+extern PNG_EXPORT(void,png_chunk_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_NO_WARNINGS */
+
+/* The png_set_<chunk> functions are for storing values in the png_info_struct.
+ * Similarly, the png_get_<chunk> calls are used to read values from the
+ * png_info_struct, either storing the parameters in the passed variables, or
+ * setting pointers into the png_info_struct where the data is stored.  The
+ * png_get_<chunk> functions return a non-zero value if the data was available
+ * in info_ptr, or return zero and do not change any of the parameters if the
+ * data was not available.
+ *
+ * These functions should be used instead of directly accessing png_info
+ * to avoid problems with future changes in the size and internal layout of
+ * png_info_struct.
+ */
+/* Returns "flag" if chunk data is valid in info_ptr. */
+extern PNG_EXPORT(png_uint_32,png_get_valid) PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 flag));
+
+/* Returns number of bytes needed to hold a transformed row. */
+extern PNG_EXPORT(png_uint_32,png_get_rowbytes) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* Returns row_pointers, which is an array of pointers to scanlines that was
+returned from png_read_png(). */
+extern PNG_EXPORT(png_bytepp,png_get_rows) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+/* Set row_pointers, which is an array of pointers to scanlines for use
+by png_write_png(). */
+extern PNG_EXPORT(void,png_set_rows) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytepp row_pointers));
+#endif
+
+/* Returns number of color channels in image. */
+extern PNG_EXPORT(png_byte,png_get_channels) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* Returns image width in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_width) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image height in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_height) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image bit_depth. */
+extern PNG_EXPORT(png_byte, png_get_bit_depth) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image color_type. */
+extern PNG_EXPORT(png_byte, png_get_color_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image filter_type. */
+extern PNG_EXPORT(png_byte, png_get_filter_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image interlace_type. */
+extern PNG_EXPORT(png_byte, png_get_interlace_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image compression_type. */
+extern PNG_EXPORT(png_byte, png_get_compression_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image resolution in pixels per meter, from pHYs chunk data. */
+extern PNG_EXPORT(png_uint_32, png_get_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_x_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_y_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns pixel aspect ratio, computed from pHYs chunk data.  */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(float, png_get_pixel_aspect_ratio) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+#endif
+
+/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
+extern PNG_EXPORT(png_int_32, png_get_x_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_x_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+#endif /* PNG_EASY_ACCESS_SUPPORTED */
+
+/* Returns pointer to signature string read from PNG header */
+extern PNG_EXPORT(png_bytep,png_get_signature) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_bKGD_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p *background));
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED)
+extern PNG_EXPORT(void,png_set_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p background));
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *white_x, double *white_y, double *red_x,
+   double *red_y, double *green_x, double *green_y, double *blue_x,
+   double *blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_white_x, png_fixed_point
+   *int_white_y, png_fixed_point *int_red_x, png_fixed_point *int_red_y,
+   png_fixed_point *int_green_x, png_fixed_point *int_green_y, png_fixed_point
+   *int_blue_x, png_fixed_point *int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double white_x, double white_y, double red_x,
+   double red_y, double green_x, double green_y, double blue_x, double blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *file_gamma));
+#endif
+extern PNG_EXPORT(png_uint_32,png_get_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_file_gamma));
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double file_gamma));
+#endif
+extern PNG_EXPORT(void,png_set_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_file_gamma));
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p *hist));
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+extern PNG_EXPORT(void,png_set_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p hist));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *width, png_uint_32 *height,
+   int *bit_depth, int *color_type, int *interlace_method,
+   int *compression_method, int *filter_method));
+
+extern PNG_EXPORT(void,png_set_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth,
+   int color_type, int interlace_method, int compression_method,
+   int filter_method));
+
+#if defined(PNG_oFFs_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 *offset_x, png_int_32 *offset_y,
+   int *unit_type));
+#endif
+
+#if defined(PNG_oFFs_SUPPORTED)
+extern PNG_EXPORT(void,png_set_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 offset_x, png_int_32 offset_y,
+   int unit_type));
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp *purpose, png_int_32 *X0, png_int_32 *X1,
+   int *type, int *nparams, png_charp *units, png_charpp *params));
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+extern PNG_EXPORT(void,png_set_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp purpose, png_int_32 X0, png_int_32 X1,
+   int type, int nparams, png_charp units, png_charpp params));
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+extern PNG_EXPORT(void,png_set_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp *palette, int *num_palette));
+
+extern PNG_EXPORT(void,png_set_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp palette, int num_palette));
+
+#if defined(PNG_sBIT_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p *sig_bit));
+#endif
+
+#if defined(PNG_sBIT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p sig_bit));
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *intent));
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+extern PNG_EXPORT(void,png_set_sRGB_gAMA_and_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charpp name, int *compression_type,
+   png_charpp profile, png_uint_32 *proflen));
+   /* Note to maintainer: profile should be png_bytepp */
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+extern PNG_EXPORT(void,png_set_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp name, int compression_type,
+   png_charp profile, png_uint_32 proflen));
+   /* Note to maintainer: profile should be png_bytep */
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tpp entries));
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tp entries, int nentries));
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+/* png_get_text also returns the number of text chunks in *num_text */
+extern PNG_EXPORT(png_uint_32,png_get_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp *text_ptr, int *num_text));
+#endif
+
+/*
+ *  Note while png_set_text() will accept a structure whose text,
+ *  language, and  translated keywords are NULL pointers, the structure
+ *  returned by png_get_text will always contain regular
+ *  zero-terminated C strings.  They might be empty strings but
+ *  they will never be NULL pointers.
+ */
+
+#if defined(PNG_TEXT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep *mod_time));
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+extern PNG_EXPORT(void,png_set_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep mod_time));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep *trans, int *num_trans,
+   png_color_16p *trans_values));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+extern PNG_EXPORT(void,png_set_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep trans, int num_trans,
+   png_color_16p trans_values));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, double *width, double *height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, png_charpp swidth, png_charpp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED */
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, png_charp swidth, png_charp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED || PNG_WRITE_sCAL_SUPPORTED */
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+/* provide a list of chunks and how they are to be handled, if the built-in
+   handling or default unknown chunk handling is not desired.  Any chunks not
+   listed will be handled in the default manner.  The IHDR and IEND chunks
+   must not be listed.
+      keep = 0: follow default behaviour
+           = 1: do not keep
+           = 2: keep only if safe-to-copy
+           = 3: keep even if unsafe-to-copy
+*/
+extern PNG_EXPORT(void, png_set_keep_unknown_chunks) PNGARG((png_structp
+   png_ptr, int keep, png_bytep chunk_list, int num_chunks));
+extern PNG_EXPORT(void, png_set_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns));
+extern PNG_EXPORT(void, png_set_unknown_chunk_location)
+   PNGARG((png_structp png_ptr, png_infop info_ptr, int chunk, int location));
+extern PNG_EXPORT(png_uint_32,png_get_unknown_chunks) PNGARG((png_structp
+   png_ptr, png_infop info_ptr, png_unknown_chunkpp entries));
+#endif
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+PNG_EXPORT(int,png_handle_as_unknown) PNGARG((png_structp png_ptr, png_bytep
+   chunk_name));
+#endif
+
+/* Png_free_data() will turn off the "valid" flag for anything it frees.
+   If you need to turn it off for a chunk that your application has freed,
+   you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK); */
+extern PNG_EXPORT(void, png_set_invalid) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int mask));
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* The "params" pointer is currently not used and is for future expansion. */
+extern PNG_EXPORT(void, png_read_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+extern PNG_EXPORT(void, png_write_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+#endif
+
+/* Define PNG_DEBUG at compile time for debugging information.  Higher
+ * numbers for PNG_DEBUG mean more debugging information.  This has
+ * only been added since version 0.95 so it is not implemented throughout
+ * libpng yet, but more support will be added as needed.
+ */
+#ifdef PNG_DEBUG
+#if (PNG_DEBUG > 0)
+#if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER)
+#include <crtdbg.h>
+#if (PNG_DEBUG > 1)
+#define png_debug(l,m)  _RPT0(_CRT_WARN,m)
+#define png_debug1(l,m,p1)  _RPT1(_CRT_WARN,m,p1)
+#define png_debug2(l,m,p1,p2) _RPT2(_CRT_WARN,m,p1,p2)
+#endif
+#else /* PNG_DEBUG_FILE || !_MSC_VER */
+#ifndef PNG_DEBUG_FILE
+#define PNG_DEBUG_FILE stderr
+#endif /* PNG_DEBUG_FILE */
+#if (PNG_DEBUG > 1)
+#define png_debug(l,m) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":"")))); \
+}
+#define png_debug1(l,m,p1) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1); \
+}
+#define png_debug2(l,m,p1,p2) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1,p2); \
+}
+#endif /* (PNG_DEBUG > 1) */
+#endif /* _MSC_VER */
+#endif /* (PNG_DEBUG > 0) */
+#endif /* PNG_DEBUG */
+#ifndef png_debug
+#define png_debug(l, m)
+#endif
+#ifndef png_debug1
+#define png_debug1(l, m, p1)
+#endif
+#ifndef png_debug2
+#define png_debug2(l, m, p1, p2)
+#endif
+
+extern PNG_EXPORT(png_charp,png_get_copyright) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_ver) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_version) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr));
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_permit_mng_features) PNGARG((png_structp
+   png_ptr, png_uint_32 mng_features_permitted));
+#endif
+
+/* For use in png_set_keep_unknown, added to version 1.2.6 */
+#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
+#define PNG_HANDLE_CHUNK_NEVER        1
+#define PNG_HANDLE_CHUNK_IF_SAFE      2
+#define PNG_HANDLE_CHUNK_ALWAYS       3
+
+/* Added to version 1.2.0 */
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if defined(PNG_MMX_CODE_SUPPORTED)
+#define PNG_ASM_FLAG_MMX_SUPPORT_COMPILED  0x01  /* not user-settable */
+#define PNG_ASM_FLAG_MMX_SUPPORT_IN_CPU    0x02  /* not user-settable */
+#define PNG_ASM_FLAG_MMX_READ_COMBINE_ROW  0x04
+#define PNG_ASM_FLAG_MMX_READ_INTERLACE    0x08
+#define PNG_ASM_FLAG_MMX_READ_FILTER_SUB   0x10
+#define PNG_ASM_FLAG_MMX_READ_FILTER_UP    0x20
+#define PNG_ASM_FLAG_MMX_READ_FILTER_AVG   0x40
+#define PNG_ASM_FLAG_MMX_READ_FILTER_PAETH 0x80
+#define PNG_ASM_FLAGS_INITIALIZED          0x80000000  /* not user-settable */
+
+#define PNG_MMX_READ_FLAGS ( PNG_ASM_FLAG_MMX_READ_COMBINE_ROW  \
+                           | PNG_ASM_FLAG_MMX_READ_INTERLACE    \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_SUB   \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_UP    \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_AVG   \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_PAETH )
+#define PNG_MMX_WRITE_FLAGS ( 0 )
+
+#define PNG_MMX_FLAGS ( PNG_ASM_FLAG_MMX_SUPPORT_COMPILED \
+                      | PNG_ASM_FLAG_MMX_SUPPORT_IN_CPU   \
+                      | PNG_MMX_READ_FLAGS                \
+                      | PNG_MMX_WRITE_FLAGS )
+
+#define PNG_SELECT_READ   1
+#define PNG_SELECT_WRITE  2
+#endif /* PNG_MMX_CODE_SUPPORTED */
+
+#if !defined(PNG_1_0_X)
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_mmx_flagmask)
+   PNGARG((int flag_select, int *compilerID));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_asm_flagmask)
+   PNGARG((int flag_select));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_asm_flags)
+   PNGARG((png_structp png_ptr));
+
+/* pngget.c */
+extern PNG_EXPORT(png_byte,png_get_mmx_bitdepth_threshold)
+   PNGARG((png_structp png_ptr));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_mmx_rowbytes_threshold)
+   PNGARG((png_structp png_ptr));
+
+/* pngset.c */
+extern PNG_EXPORT(void,png_set_asm_flags)
+   PNGARG((png_structp png_ptr, png_uint_32 asm_flags));
+
+/* pngset.c */
+extern PNG_EXPORT(void,png_set_mmx_thresholds)
+   PNGARG((png_structp png_ptr, png_byte mmx_bitdepth_threshold,
+   png_uint_32 mmx_rowbytes_threshold));
+
+#endif /* PNG_1_0_X */
+
+#if !defined(PNG_1_0_X)
+/* png.c, pnggccrd.c, or pngvcrd.c */
+extern PNG_EXPORT(int,png_mmx_support) PNGARG((void));
+#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
+
+/* Strip the prepended error numbers ("#nnn ") from error and warning
+ * messages before passing them to the error or warning handler. */
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+extern PNG_EXPORT(void,png_set_strip_error_numbers) PNGARG((png_structp
+   png_ptr, png_uint_32 strip_mode));
+#endif
+
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+extern PNG_EXPORT(void,png_set_user_limits) PNGARG((png_structp
+   png_ptr, png_uint_32 user_width_max, png_uint_32 user_height_max));
+extern PNG_EXPORT(png_uint_32,png_get_user_width_max) PNGARG((png_structp
+   png_ptr));
+extern PNG_EXPORT(png_uint_32,png_get_user_height_max) PNGARG((png_structp
+   png_ptr));
+#endif
+
+/* Maintainer: Put new public prototypes here ^, in libpng.3, and project defs */
+
+#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
+/* With these routines we avoid an integer divide, which will be slower on
+ * most machines.  However, it does take more operations than the corresponding
+ * divide method, so it may be slower on a few RISC systems.  There are two
+ * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
+ *
+ * Note that the rounding factors are NOT supposed to be the same!  128 and
+ * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
+ * standard method.
+ *
+ * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
+ */
+
+ /* fg and bg should be in `gamma 1.0' space; alpha is the opacity          */
+
+#  define png_composite(composite, fg, alpha, bg)                            \
+     { png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) * (png_uint_16)(alpha) \
+                        +        (png_uint_16)(bg)*(png_uint_16)(255 -       \
+                        (png_uint_16)(alpha)) + (png_uint_16)128);           \
+       (composite) = (png_byte)((temp + (temp >> 8)) >> 8); }
+
+#  define png_composite_16(composite, fg, alpha, bg)                         \
+     { png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) * (png_uint_32)(alpha) \
+                        + (png_uint_32)(bg)*(png_uint_32)(65535L -           \
+                        (png_uint_32)(alpha)) + (png_uint_32)32768L);        \
+       (composite) = (png_uint_16)((temp + (temp >> 16)) >> 16); }
+
+#else  /* standard method using integer division */
+
+#  define png_composite(composite, fg, alpha, bg)                            \
+     (composite) = (png_byte)(((png_uint_16)(fg) * (png_uint_16)(alpha) +    \
+       (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) +       \
+       (png_uint_16)127) / 255)
+
+#  define png_composite_16(composite, fg, alpha, bg)                         \
+     (composite) = (png_uint_16)(((png_uint_32)(fg) * (png_uint_32)(alpha) + \
+       (png_uint_32)(bg)*(png_uint_32)(65535L - (png_uint_32)(alpha)) +      \
+       (png_uint_32)32767) / (png_uint_32)65535L)
+
+#endif /* PNG_READ_COMPOSITE_NODIV_SUPPORTED */
+
+/* Inline macros to do direct reads of bytes from the input buffer.  These
+ * require that you are using an architecture that uses PNG byte ordering
+ * (MSB first) and supports unaligned data storage.  I think that PowerPC
+ * in big-endian mode and 680x0 are the only ones that will support this.
+ * The x86 line of processors definitely do not.  The png_get_int_32()
+ * routine also assumes we are using two's complement format for negative
+ * values, which is almost certainly true.
+ */
+#if defined(PNG_READ_BIG_ENDIAN_SUPPORTED)
+#  define png_get_uint_32(buf) ( *((png_uint_32p) (buf)))
+#  define png_get_uint_16(buf) ( *((png_uint_16p) (buf)))
+#  define png_get_int_32(buf)  ( *((png_int_32p)  (buf)))
+#else
+extern PNG_EXPORT(png_uint_32,png_get_uint_32) PNGARG((png_bytep buf));
+extern PNG_EXPORT(png_uint_16,png_get_uint_16) PNGARG((png_bytep buf));
+extern PNG_EXPORT(png_int_32,png_get_int_32) PNGARG((png_bytep buf));
+#endif /* !PNG_READ_BIG_ENDIAN_SUPPORTED */
+extern PNG_EXPORT(png_uint_32,png_get_uint_31)
+  PNGARG((png_structp png_ptr, png_bytep buf));
+/* No png_get_int_16 -- may be added if there's a real need for it. */
+
+/* Place a 32-bit number into a buffer in PNG byte order (big-endian).
+ */
+extern PNG_EXPORT(void,png_save_uint_32)
+   PNGARG((png_bytep buf, png_uint_32 i));
+extern PNG_EXPORT(void,png_save_int_32)
+   PNGARG((png_bytep buf, png_int_32 i));
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+extern PNG_EXPORT(void,png_save_uint_16)
+   PNGARG((png_bytep buf, unsigned int i));
+/* No png_save_int_16 -- may be added if there's a real need for it. */
+
+/* ************************************************************************* */
+
+/* These next functions are used internally in the code.  They generally
+ * shouldn't be used unless you are writing code to add or replace some
+ * functionality in libpng.  More information about most functions can
+ * be found in the files where the functions are located.
+ */
+
+
+/* Various modes of operation, that are visible to applications because
+ * they are used for unknown chunk location.
+ */
+#define PNG_HAVE_IHDR               0x01
+#define PNG_HAVE_PLTE               0x02
+#define PNG_HAVE_IDAT               0x04
+#define PNG_AFTER_IDAT              0x08 /* Have complete zlib datastream */
+#define PNG_HAVE_IEND               0x10
+
+#if defined(PNG_INTERNAL)
+
+/* More modes of operation.  Note that after an init, mode is set to
+ * zero automatically when the structure is created.
+ */
+#define PNG_HAVE_gAMA               0x20
+#define PNG_HAVE_cHRM               0x40
+#define PNG_HAVE_sRGB               0x80
+#define PNG_HAVE_CHUNK_HEADER      0x100
+#define PNG_WROTE_tIME             0x200
+#define PNG_WROTE_INFO_BEFORE_PLTE 0x400
+#define PNG_BACKGROUND_IS_GRAY     0x800
+#define PNG_HAVE_PNG_SIGNATURE    0x1000
+#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000 /* Have another chunk after IDAT */
+
+/* flags for the transformations the PNG library does on the image data */
+#define PNG_BGR                0x0001
+#define PNG_INTERLACE          0x0002
+#define PNG_PACK               0x0004
+#define PNG_SHIFT              0x0008
+#define PNG_SWAP_BYTES         0x0010
+#define PNG_INVERT_MONO        0x0020
+#define PNG_DITHER             0x0040
+#define PNG_BACKGROUND         0x0080
+#define PNG_BACKGROUND_EXPAND  0x0100
+                          /*   0x0200 unused */
+#define PNG_16_TO_8            0x0400
+#define PNG_RGBA               0x0800
+#define PNG_EXPAND             0x1000
+#define PNG_GAMMA              0x2000
+#define PNG_GRAY_TO_RGB        0x4000
+#define PNG_FILLER             0x8000L
+#define PNG_PACKSWAP          0x10000L
+#define PNG_SWAP_ALPHA        0x20000L
+#define PNG_STRIP_ALPHA       0x40000L
+#define PNG_INVERT_ALPHA      0x80000L
+#define PNG_USER_TRANSFORM   0x100000L
+#define PNG_RGB_TO_GRAY_ERR  0x200000L
+#define PNG_RGB_TO_GRAY_WARN 0x400000L
+#define PNG_RGB_TO_GRAY      0x600000L  /* two bits, RGB_TO_GRAY_ERR|WARN */
+                       /*    0x800000L     Unused */
+#define PNG_ADD_ALPHA       0x1000000L  /* Added to libpng-1.2.7 */
+#define PNG_EXPAND_tRNS     0x2000000L  /* Added to libpng-1.2.9 */
+                       /*   0x4000000L  unused */
+                       /*   0x8000000L  unused */
+                       /*  0x10000000L  unused */
+                       /*  0x20000000L  unused */
+                       /*  0x40000000L  unused */
+
+/* flags for png_create_struct */
+#define PNG_STRUCT_PNG   0x0001
+#define PNG_STRUCT_INFO  0x0002
+
+/* Scaling factor for filter heuristic weighting calculations */
+#define PNG_WEIGHT_SHIFT 8
+#define PNG_WEIGHT_FACTOR (1<<(PNG_WEIGHT_SHIFT))
+#define PNG_COST_SHIFT 3
+#define PNG_COST_FACTOR (1<<(PNG_COST_SHIFT))
+
+/* flags for the png_ptr->flags rather than declaring a byte for each one */
+#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY     0x0001
+#define PNG_FLAG_ZLIB_CUSTOM_LEVEL        0x0002
+#define PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL    0x0004
+#define PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS  0x0008
+#define PNG_FLAG_ZLIB_CUSTOM_METHOD       0x0010
+#define PNG_FLAG_ZLIB_FINISHED            0x0020
+#define PNG_FLAG_ROW_INIT                 0x0040
+#define PNG_FLAG_FILLER_AFTER             0x0080
+#define PNG_FLAG_CRC_ANCILLARY_USE        0x0100
+#define PNG_FLAG_CRC_ANCILLARY_NOWARN     0x0200
+#define PNG_FLAG_CRC_CRITICAL_USE         0x0400
+#define PNG_FLAG_CRC_CRITICAL_IGNORE      0x0800
+#define PNG_FLAG_FREE_PLTE                0x1000
+#define PNG_FLAG_FREE_TRNS                0x2000
+#define PNG_FLAG_FREE_HIST                0x4000
+#define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000L
+#define PNG_FLAG_KEEP_UNSAFE_CHUNKS       0x10000L
+#define PNG_FLAG_LIBRARY_MISMATCH         0x20000L
+#define PNG_FLAG_STRIP_ERROR_NUMBERS      0x40000L
+#define PNG_FLAG_STRIP_ERROR_TEXT         0x80000L
+#define PNG_FLAG_MALLOC_NULL_MEM_OK       0x100000L
+#define PNG_FLAG_ADD_ALPHA                0x200000L  /* Added to libpng-1.2.8 */
+#define PNG_FLAG_STRIP_ALPHA              0x400000L  /* Added to libpng-1.2.8 */
+                                  /*      0x800000L  unused */
+                                  /*     0x1000000L  unused */
+                                  /*     0x2000000L  unused */
+                                  /*     0x4000000L  unused */
+                                  /*     0x8000000L  unused */
+                                  /*    0x10000000L  unused */
+                                  /*    0x20000000L  unused */
+                                  /*    0x40000000L  unused */
+
+#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
+                                     PNG_FLAG_CRC_ANCILLARY_NOWARN)
+
+#define PNG_FLAG_CRC_CRITICAL_MASK  (PNG_FLAG_CRC_CRITICAL_USE | \
+                                     PNG_FLAG_CRC_CRITICAL_IGNORE)
+
+#define PNG_FLAG_CRC_MASK           (PNG_FLAG_CRC_ANCILLARY_MASK | \
+                                     PNG_FLAG_CRC_CRITICAL_MASK)
+
+/* save typing and make code easier to understand */
+
+#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \
+   abs((int)((c1).green) - (int)((c2).green)) + \
+   abs((int)((c1).blue) - (int)((c2).blue)))
+
+/* Added to libpng-1.2.6 JB */
+#define PNG_ROWBYTES(pixel_bits, width) \
+    ((pixel_bits) >= 8 ? \
+    ((width) * (((png_uint_32)(pixel_bits)) >> 3)) : \
+    (( ((width) * ((png_uint_32)(pixel_bits))) + 7) >> 3) )
+
+/* PNG_OUT_OF_RANGE returns true if value is outside the range
+   ideal-delta..ideal+delta.  Each argument is evaluated twice.
+   "ideal" and "delta" should be constants, normally simple
+   integers, "value" a variable. Added to libpng-1.2.6 JB */
+#define PNG_OUT_OF_RANGE(value, ideal, delta) \
+        ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) )
+
+/* variables declared in png.c - only it needs to define PNG_NO_EXTERN */
+#if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
+/* place to hold the signature string for a PNG file. */
+#ifdef PNG_USE_GLOBAL_ARRAYS
+   PNG_EXPORT_VAR (PNG_CONST png_byte FARDATA) png_sig[8];
+#else
+#endif
+#endif /* PNG_NO_EXTERN */
+
+/* Constant strings for known chunk types.  If you need to add a chunk,
+ * define the name here, and add an invocation of the macro in png.c and
+ * wherever it's needed.
+ */
+#define PNG_IHDR png_byte png_IHDR[5] = { 73,  72,  68,  82, '\0'}
+#define PNG_IDAT png_byte png_IDAT[5] = { 73,  68,  65,  84, '\0'}
+#define PNG_IEND png_byte png_IEND[5] = { 73,  69,  78,  68, '\0'}
+#define PNG_PLTE png_byte png_PLTE[5] = { 80,  76,  84,  69, '\0'}
+#define PNG_bKGD png_byte png_bKGD[5] = { 98,  75,  71,  68, '\0'}
+#define PNG_cHRM png_byte png_cHRM[5] = { 99,  72,  82,  77, '\0'}
+#define PNG_gAMA png_byte png_gAMA[5] = {103,  65,  77,  65, '\0'}
+#define PNG_hIST png_byte png_hIST[5] = {104,  73,  83,  84, '\0'}
+#define PNG_iCCP png_byte png_iCCP[5] = {105,  67,  67,  80, '\0'}
+#define PNG_iTXt png_byte png_iTXt[5] = {105,  84,  88, 116, '\0'}
+#define PNG_oFFs png_byte png_oFFs[5] = {111,  70,  70, 115, '\0'}
+#define PNG_pCAL png_byte png_pCAL[5] = {112,  67,  65,  76, '\0'}
+#define PNG_sCAL png_byte png_sCAL[5] = {115,  67,  65,  76, '\0'}
+#define PNG_pHYs png_byte png_pHYs[5] = {112,  72,  89, 115, '\0'}
+#define PNG_sBIT png_byte png_sBIT[5] = {115,  66,  73,  84, '\0'}
+#define PNG_sPLT png_byte png_sPLT[5] = {115,  80,  76,  84, '\0'}
+#define PNG_sRGB png_byte png_sRGB[5] = {115,  82,  71,  66, '\0'}
+#define PNG_tEXt png_byte png_tEXt[5] = {116,  69,  88, 116, '\0'}
+#define PNG_tIME png_byte png_tIME[5] = {116,  73,  77,  69, '\0'}
+#define PNG_tRNS png_byte png_tRNS[5] = {116,  82,  78,  83, '\0'}
+#define PNG_zTXt png_byte png_zTXt[5] = {122,  84,  88, 116, '\0'}
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+PNG_EXPORT_VAR (png_byte FARDATA) png_IHDR[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IDAT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IEND[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_PLTE[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_bKGD[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_cHRM[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_gAMA[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_hIST[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iCCP[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iTXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_oFFs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pHYs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sBIT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sPLT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sRGB[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tEXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tIME[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tRNS[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_zTXt[5];
+#endif /* PNG_USE_GLOBAL_ARRAYS */
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize png_ptr struct for reading, and allocate any other memory.
+ * (old interface - DEPRECATED - use png_create_read_struct instead).
+ */
+extern PNG_EXPORT(void,png_read_init) PNGARG((png_structp png_ptr));
+#undef png_read_init
+#define png_read_init(png_ptr) png_read_init_3(&png_ptr, \
+    PNG_LIBPNG_VER_STRING,  png_sizeof(png_struct));
+#endif
+
+extern PNG_EXPORT(void,png_read_init_3) PNGARG((png_structpp ptr_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size));
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+extern PNG_EXPORT(void,png_read_init_2) PNGARG((png_structp png_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size, png_size_t
+    png_info_size));
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize png_ptr struct for writing, and allocate any other memory.
+ * (old interface - DEPRECATED - use png_create_write_struct instead).
+ */
+extern PNG_EXPORT(void,png_write_init) PNGARG((png_structp png_ptr));
+#undef png_write_init
+#define png_write_init(png_ptr) png_write_init_3(&png_ptr, \
+    PNG_LIBPNG_VER_STRING, png_sizeof(png_struct));
+#endif
+
+extern PNG_EXPORT(void,png_write_init_3) PNGARG((png_structpp ptr_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size));
+extern PNG_EXPORT(void,png_write_init_2) PNGARG((png_structp png_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size, png_size_t
+    png_info_size));
+
+/* Allocate memory for an internal libpng struct */
+PNG_EXTERN png_voidp png_create_struct PNGARG((int type));
+
+/* Free memory from internal libpng struct */
+PNG_EXTERN void png_destroy_struct PNGARG((png_voidp struct_ptr));
+
+PNG_EXTERN png_voidp png_create_struct_2 PNGARG((int type, png_malloc_ptr
+  malloc_fn, png_voidp mem_ptr));
+PNG_EXTERN void png_destroy_struct_2 PNGARG((png_voidp struct_ptr,
+   png_free_ptr free_fn, png_voidp mem_ptr));
+
+/* Free any memory that info_ptr points to and reset struct. */
+PNG_EXTERN void png_info_destroy PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_1_0_X
+/* Function to allocate memory for zlib. */
+PNG_EXTERN voidpf png_zalloc PNGARG((voidpf png_ptr, uInt items, uInt size));
+
+/* Function to free memory for zlib */
+PNG_EXTERN void png_zfree PNGARG((voidpf png_ptr, voidpf ptr));
+
+#ifdef PNG_SIZE_T
+/* Function to convert a sizeof an item to png_sizeof item */
+   PNG_EXTERN png_size_t PNGAPI png_convert_size PNGARG((size_t size));
+#endif
+
+/* Next four functions are used internally as callbacks.  PNGAPI is required
+ * but not PNG_EXPORT.  PNGAPI added at libpng version 1.2.3. */
+
+PNG_EXTERN void PNGAPI png_default_read_data PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void PNGAPI png_push_fill_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t length));
+#endif
+
+PNG_EXTERN void PNGAPI png_default_write_data PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+PNG_EXTERN void PNGAPI png_default_flush PNGARG((png_structp png_ptr));
+#endif
+#endif
+#else /* PNG_1_0_X */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void png_push_fill_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t length));
+#endif
+#endif /* PNG_1_0_X */
+
+/* Reset the CRC variable */
+PNG_EXTERN void png_reset_crc PNGARG((png_structp png_ptr));
+
+/* Write the "data" buffer to whatever output you are using. */
+PNG_EXTERN void png_write_data PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+/* Read data from whatever input you are using into the "data" buffer */
+PNG_EXTERN void png_read_data PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+/* Read bytes into buf, and update png_ptr->crc */
+PNG_EXTERN void png_crc_read PNGARG((png_structp png_ptr, png_bytep buf,
+   png_size_t length));
+
+/* Decompress data in a chunk that uses compression */
+#if defined(PNG_zTXt_SUPPORTED) || defined(PNG_iTXt_SUPPORTED) || \
+    defined(PNG_iCCP_SUPPORTED) || defined(PNG_sPLT_SUPPORTED)
+PNG_EXTERN png_charp png_decompress_chunk PNGARG((png_structp png_ptr,
+   int comp_type, png_charp chunkdata, png_size_t chunklength,
+   png_size_t prefix_length, png_size_t *data_length));
+#endif
+
+/* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */
+PNG_EXTERN int png_crc_finish PNGARG((png_structp png_ptr, png_uint_32 skip));
+
+/* Read the CRC from the file and compare it to the libpng calculated CRC */
+PNG_EXTERN int png_crc_error PNGARG((png_structp png_ptr));
+
+/* Calculate the CRC over a section of data.  Note that we are only
+ * passing a maximum of 64K on systems that have this as a memory limit,
+ * since this is the maximum buffer size we can specify.
+ */
+PNG_EXTERN void png_calculate_crc PNGARG((png_structp png_ptr, png_bytep ptr,
+   png_size_t length));
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+PNG_EXTERN void png_flush PNGARG((png_structp png_ptr));
+#endif
+
+/* simple function to write the signature */
+PNG_EXTERN void png_write_sig PNGARG((png_structp png_ptr));
+
+/* write various chunks */
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.
+ */
+PNG_EXTERN void png_write_IHDR PNGARG((png_structp png_ptr, png_uint_32 width,
+   png_uint_32 height,
+   int bit_depth, int color_type, int compression_method, int filter_method,
+   int interlace_method));
+
+PNG_EXTERN void png_write_PLTE PNGARG((png_structp png_ptr, png_colorp palette,
+   png_uint_32 num_pal));
+
+PNG_EXTERN void png_write_IDAT PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+PNG_EXTERN void png_write_IEND PNGARG((png_structp png_ptr));
+
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+PNG_EXTERN void png_write_gAMA PNGARG((png_structp png_ptr, double file_gamma));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_gAMA_fixed PNGARG((png_structp png_ptr, png_fixed_point
+    file_gamma));
+#endif
+#endif
+
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+PNG_EXTERN void png_write_sBIT PNGARG((png_structp png_ptr, png_color_8p sbit,
+   int color_type));
+#endif
+
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+PNG_EXTERN void png_write_cHRM PNGARG((png_structp png_ptr,
+   double white_x, double white_y,
+   double red_x, double red_y, double green_x, double green_y,
+   double blue_x, double blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_cHRM_fixed PNGARG((png_structp png_ptr,
+   png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+PNG_EXTERN void png_write_sRGB PNGARG((png_structp png_ptr,
+   int intent));
+#endif
+
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+PNG_EXTERN void png_write_iCCP PNGARG((png_structp png_ptr,
+   png_charp name, int compression_type,
+   png_charp profile, int proflen));
+   /* Note to maintainer: profile should be png_bytep */
+#endif
+
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr,
+   png_sPLT_tp palette));
+#endif
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+PNG_EXTERN void png_write_tRNS PNGARG((png_structp png_ptr, png_bytep trans,
+   png_color_16p values, int number, int color_type));
+#endif
+
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+PNG_EXTERN void png_write_bKGD PNGARG((png_structp png_ptr,
+   png_color_16p values, int color_type));
+#endif
+
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+PNG_EXTERN void png_write_hIST PNGARG((png_structp png_ptr, png_uint_16p hist,
+   int num_hist));
+#endif
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
+    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+PNG_EXTERN png_size_t png_check_keyword PNGARG((png_structp png_ptr,
+   png_charp key, png_charpp new_key));
+#endif
+
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+PNG_EXTERN void png_write_tEXt PNGARG((png_structp png_ptr, png_charp key,
+   png_charp text, png_size_t text_len));
+#endif
+
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+PNG_EXTERN void png_write_zTXt PNGARG((png_structp png_ptr, png_charp key,
+   png_charp text, png_size_t text_len, int compression));
+#endif
+
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr,
+   int compression, png_charp key, png_charp lang, png_charp lang_key,
+   png_charp text));
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)  /* Added at version 1.0.14 and 1.2.4 */
+PNG_EXTERN int png_set_text_2 PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+PNG_EXTERN void png_write_oFFs PNGARG((png_structp png_ptr,
+   png_int_32 x_offset, png_int_32 y_offset, int unit_type));
+#endif
+
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+PNG_EXTERN void png_write_pCAL PNGARG((png_structp png_ptr, png_charp purpose,
+   png_int_32 X0, png_int_32 X1, int type, int nparams,
+   png_charp units, png_charpp params));
+#endif
+
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+PNG_EXTERN void png_write_pHYs PNGARG((png_structp png_ptr,
+   png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit,
+   int unit_type));
+#endif
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+PNG_EXTERN void png_write_tIME PNGARG((png_structp png_ptr,
+   png_timep mod_time));
+#endif
+
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+PNG_EXTERN void png_write_sCAL PNGARG((png_structp png_ptr,
+   int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_sCAL_s PNGARG((png_structp png_ptr,
+   int unit, png_charp width, png_charp height));
+#endif
+#endif
+#endif
+
+/* Called when finished processing a row of data */
+PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr));
+
+/* Internal use only.   Called before first row of data */
+PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr));
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_build_gamma_table PNGARG((png_structp png_ptr));
+#endif
+
+/* combine a row of data, dealing with alpha, etc. if requested */
+PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row,
+   int mask));
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+/* expand an interlaced row */
+/* OLD pre-1.0.9 interface:
+PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info,
+   png_bytep row, int pass, png_uint_32 transformations));
+ */
+PNG_EXTERN void png_do_read_interlace PNGARG((png_structp png_ptr));
+#endif
+
+/* GRR TO DO (2.0 or whenever):  simplify other internal calling interfaces */
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* grab pixels out of a row for an interlaced pass */
+PNG_EXTERN void png_do_write_interlace PNGARG((png_row_infop row_info,
+   png_bytep row, int pass));
+#endif
+
+/* unfilter a row */
+PNG_EXTERN void png_read_filter_row PNGARG((png_structp png_ptr,
+   png_row_infop row_info, png_bytep row, png_bytep prev_row, int filter));
+
+/* Choose the best filter to use and filter the row data */
+PNG_EXTERN void png_write_find_filter PNGARG((png_structp png_ptr,
+   png_row_infop row_info));
+
+/* Write out the filtered row. */
+PNG_EXTERN void png_write_filtered_row PNGARG((png_structp png_ptr,
+   png_bytep filtered_row));
+/* finish a row while reading, dealing with interlacing passes, etc. */
+PNG_EXTERN void png_read_finish_row PNGARG((png_structp png_ptr));
+
+/* initialize the row buffers, etc. */
+PNG_EXTERN void png_read_start_row PNGARG((png_structp png_ptr));
+/* optional call to update the users info structure */
+PNG_EXTERN void png_read_transform_info PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+/* these are the functions that do the transformations */
+#if defined(PNG_READ_FILLER_SUPPORTED)
+PNG_EXTERN void png_do_read_filler PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 filler, png_uint_32 flags));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_read_swap_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_write_swap_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_read_invert_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_write_invert_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
+    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_strip_filler PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 flags));
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+PNG_EXTERN void png_do_swap PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+PNG_EXTERN void png_do_packswap PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+PNG_EXTERN int png_do_rgb_to_gray PNGARG((png_structp png_ptr, png_row_infop
+   row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+PNG_EXTERN void png_do_gray_to_rgb PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+PNG_EXTERN void png_do_unpack PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+PNG_EXTERN void png_do_unshift PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_8p sig_bits));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+PNG_EXTERN void png_do_invert PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+PNG_EXTERN void png_do_chop PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+PNG_EXTERN void png_do_dither PNGARG((png_row_infop row_info,
+   png_bytep row, png_bytep palette_lookup, png_bytep dither_lookup));
+
+#  if defined(PNG_CORRECT_PALETTE_SUPPORTED)
+PNG_EXTERN void png_correct_palette PNGARG((png_structp png_ptr,
+   png_colorp palette, int num_palette));
+#  endif
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+PNG_EXTERN void png_do_bgr PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+PNG_EXTERN void png_do_pack PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 bit_depth));
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+PNG_EXTERN void png_do_shift PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_8p bit_depth));
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_do_background PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background,
+   png_color_16p background_1,
+   png_bytep gamma_table, png_bytep gamma_from_1, png_bytep gamma_to_1,
+   png_uint_16pp gamma_16, png_uint_16pp gamma_16_from_1,
+   png_uint_16pp gamma_16_to_1, int gamma_shift));
+#else
+PNG_EXTERN void png_do_background PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background));
+#endif
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_do_gamma PNGARG((png_row_infop row_info, png_bytep row,
+   png_bytep gamma_table, png_uint_16pp gamma_16_table,
+   int gamma_shift));
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+PNG_EXTERN void png_do_expand_palette PNGARG((png_row_infop row_info,
+   png_bytep row, png_colorp palette, png_bytep trans, int num_trans));
+PNG_EXTERN void png_do_expand PNGARG((png_row_infop row_info,
+   png_bytep row, png_color_16p trans_value));
+#endif
+
+/* The following decodes the appropriate chunks, and does error correction,
+ * then calls the appropriate callback for the chunk if it is valid.
+ */
+
+/* decode the IHDR chunk */
+PNG_EXTERN void png_handle_IHDR PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_handle_PLTE PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_handle_IEND PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+
+#if defined(PNG_READ_bKGD_SUPPORTED)
+PNG_EXTERN void png_handle_bKGD PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_cHRM_SUPPORTED)
+PNG_EXTERN void png_handle_cHRM PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_gAMA_SUPPORTED)
+PNG_EXTERN void png_handle_gAMA PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_hIST_SUPPORTED)
+PNG_EXTERN void png_handle_hIST PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_iCCP_SUPPORTED)
+extern void png_handle_iCCP PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif /* PNG_READ_iCCP_SUPPORTED */
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+PNG_EXTERN void png_handle_iTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_oFFs_SUPPORTED)
+PNG_EXTERN void png_handle_oFFs PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_pCAL_SUPPORTED)
+PNG_EXTERN void png_handle_pCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_pHYs_SUPPORTED)
+PNG_EXTERN void png_handle_pHYs PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sBIT_SUPPORTED)
+PNG_EXTERN void png_handle_sBIT PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED)
+PNG_EXTERN void png_handle_sCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sPLT_SUPPORTED)
+extern void png_handle_sPLT PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif /* PNG_READ_sPLT_SUPPORTED */
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+PNG_EXTERN void png_handle_sRGB PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+PNG_EXTERN void png_handle_tEXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tIME_SUPPORTED)
+PNG_EXTERN void png_handle_tIME PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+PNG_EXTERN void png_handle_tRNS PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+PNG_EXTERN void png_handle_zTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+PNG_EXTERN void png_handle_unknown PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+
+PNG_EXTERN void png_check_chunk_name PNGARG((png_structp png_ptr,
+   png_bytep chunk_name));
+
+/* handle the transformations for reading and writing */
+PNG_EXTERN void png_do_read_transformations PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_do_write_transformations PNGARG((png_structp png_ptr));
+
+PNG_EXTERN void png_init_read_transformations PNGARG((png_structp png_ptr));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void png_push_read_chunk PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_read_sig PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_check_crc PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_crc_skip PNGARG((png_structp png_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_push_crc_finish PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_save_buffer PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_restore_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t buffer_length));
+PNG_EXTERN void png_push_read_IDAT PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_process_IDAT_data PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t buffer_length));
+PNG_EXTERN void png_push_process_row PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_handle_unknown PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_have_info PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_have_end PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_have_row PNGARG((png_structp png_ptr, png_bytep row));
+PNG_EXTERN void png_push_read_end PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_process_some_data PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_read_push_finish_row PNGARG((png_structp png_ptr));
+#if defined(PNG_READ_tEXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_tEXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_tEXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_zTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_zTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_iTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_iTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+PNG_EXTERN void png_do_read_intrapixel PNGARG((png_row_infop row_info,
+   png_bytep row));
+PNG_EXTERN void png_do_write_intrapixel PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if defined(PNG_MMX_CODE_SUPPORTED)
+/* png.c */ /* PRIVATE */
+PNG_EXTERN void png_init_mmx_flags PNGARG((png_structp png_ptr));
+#endif
+#endif
+
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
+PNG_EXTERN png_uint_32 png_get_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN png_uint_32 png_get_x_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN png_uint_32 png_get_y_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN float png_get_x_offset_inches PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN float png_get_y_offset_inches PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_pHYs_SUPPORTED)
+PNG_EXTERN png_uint_32 png_get_pHYs_dpi PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif /* PNG_pHYs_SUPPORTED */
+#endif  /* PNG_INCH_CONVERSIONS && PNG_FLOATING_POINT_SUPPORTED */
+
+/* Maintainer: Put new private prototypes here ^ and in libpngpf.3 */
+
+#endif /* PNG_INTERNAL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PNG_VERSION_INFO_ONLY */
+/* do not put anything past this line */
+#endif /* PNG_H */
diff --git a/PNG/pngasmrd.h b/PNG/pngasmrd.h
new file mode 100644
index 0000000..d086d8c
--- /dev/null
+++ b/PNG/pngasmrd.h
@@ -0,0 +1,11 @@
+/* pngasmrd.h - assembler version of utilities to read a PNG file
+ *
+ * libpng 1.2.5 - October 3, 2002
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 2002 Glenn Randers-Pehrson
+ *
+ */
+
+/* This file is obsolete in libpng-1.0.9 and later; its contents now appear
+ * at the end of pngconf.h.
+ */
diff --git a/PNG/pngconf.h b/PNG/pngconf.h
new file mode 100644
index 0000000..d1e2995
--- /dev/null
+++ b/PNG/pngconf.h
@@ -0,0 +1,1481 @@
+
+/* pngconf.h - machine configurable file for libpng
+ *
+ * libpng version 1.2.29 - May 8, 2008
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+/* Any machine specific code is near the front of this file, so if you
+ * are configuring libpng for a machine, you may want to read the section
+ * starting here down to where it starts to typedef png_color, png_text,
+ * and png_info.
+ */
+
+#ifndef PNGCONF_H
+#define PNGCONF_H
+
+#define PNG_1_2_X
+
+/* 
+ * PNG_USER_CONFIG has to be defined on the compiler command line. This
+ * includes the resource compiler for Windows DLL configurations.
+ */
+#ifdef PNG_USER_CONFIG
+#  ifndef PNG_USER_PRIVATEBUILD
+#    define PNG_USER_PRIVATEBUILD
+#  endif
+#include "pngusr.h"
+#endif
+
+/* PNG_CONFIGURE_LIBPNG is set by the "configure" script. */
+#ifdef PNG_CONFIGURE_LIBPNG
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+/*
+ * Added at libpng-1.2.8
+ *  
+ * If you create a private DLL you need to define in "pngusr.h" the followings:
+ * #define PNG_USER_PRIVATEBUILD <Describes by whom and why this version of
+ *        the DLL was built>
+ *  e.g. #define PNG_USER_PRIVATEBUILD "Build by MyCompany for xyz reasons."
+ * #define PNG_USER_DLLFNAME_POSTFIX <two-letter postfix that serve to
+ *        distinguish your DLL from those of the official release. These
+ *        correspond to the trailing letters that come after the version
+ *        number and must match your private DLL name>
+ *  e.g. // private DLL "libpng13gx.dll"
+ *       #define PNG_USER_DLLFNAME_POSTFIX "gx"
+ * 
+ * The following macros are also at your disposal if you want to complete the 
+ * DLL VERSIONINFO structure.
+ * - PNG_USER_VERSIONINFO_COMMENTS
+ * - PNG_USER_VERSIONINFO_COMPANYNAME
+ * - PNG_USER_VERSIONINFO_LEGALTRADEMARKS
+ */
+
+#ifdef __STDC__
+#ifdef SPECIALBUILD
+#  pragma message("PNG_LIBPNG_SPECIALBUILD (and deprecated SPECIALBUILD)\
+ are now LIBPNG reserved macros. Use PNG_USER_PRIVATEBUILD instead.")
+#endif
+
+#ifdef PRIVATEBUILD
+# pragma message("PRIVATEBUILD is deprecated.\
+ Use PNG_USER_PRIVATEBUILD instead.")
+# define PNG_USER_PRIVATEBUILD PRIVATEBUILD
+#endif
+#endif /* __STDC__ */
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* End of material added to libpng-1.2.8 */
+
+/* Added at libpng-1.2.19, removed at libpng-1.2.20 because it caused trouble
+   Restored at libpng-1.2.21 */
+#if !defined(PNG_NO_WARN_UNINITIALIZED_ROW) && \
+    !defined(PNG_WARN_UNINITIALIZED_ROW)
+#  define PNG_WARN_UNINITIALIZED_ROW 1
+#endif
+/* End of material added at libpng-1.2.19/1.2.21 */
+
+/* This is the size of the compression buffer, and thus the size of
+ * an IDAT chunk.  Make this whatever size you feel is best for your
+ * machine.  One of these will be allocated per png_struct.  When this
+ * is full, it writes the data to the disk, and does some other
+ * calculations.  Making this an extremely small size will slow
+ * the library down, but you may want to experiment to determine
+ * where it becomes significant, if you are concerned with memory
+ * usage.  Note that zlib allocates at least 32Kb also.  For readers,
+ * this describes the size of the buffer available to read the data in.
+ * Unless this gets smaller than the size of a row (compressed),
+ * it should not make much difference how big this is.
+ */
+
+#ifndef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 8192
+#endif
+
+/* Enable if you want a write-only libpng */
+
+#ifndef PNG_NO_READ_SUPPORTED
+#  define PNG_READ_SUPPORTED
+#endif
+
+/* Enable if you want a read-only libpng */
+
+#ifndef PNG_NO_WRITE_SUPPORTED
+#  define PNG_WRITE_SUPPORTED
+#endif
+
+/* Enabled by default in 1.2.0.  You can disable this if you don't need to
+   support PNGs that are embedded in MNG datastreams */
+#if !defined(PNG_1_0_X) && !defined(PNG_NO_MNG_FEATURES)
+#  ifndef PNG_MNG_FEATURES_SUPPORTED
+#    define PNG_MNG_FEATURES_SUPPORTED
+#  endif
+#endif
+
+#ifndef PNG_NO_FLOATING_POINT_SUPPORTED
+#  ifndef PNG_FLOATING_POINT_SUPPORTED
+#    define PNG_FLOATING_POINT_SUPPORTED
+#  endif
+#endif
+
+/* If you are running on a machine where you cannot allocate more
+ * than 64K of memory at once, uncomment this.  While libpng will not
+ * normally need that much memory in a chunk (unless you load up a very
+ * large file), zlib needs to know how big of a chunk it can use, and
+ * libpng thus makes sure to check any memory allocation to verify it
+ * will fit into memory.
+#define PNG_MAX_MALLOC_64K
+ */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+#  define PNG_MAX_MALLOC_64K
+#endif
+
+/* Special munging to support doing things the 'cygwin' way:
+ * 'Normal' png-on-win32 defines/defaults:
+ *   PNG_BUILD_DLL -- building dll
+ *   PNG_USE_DLL   -- building an application, linking to dll
+ *   (no define)   -- building static library, or building an
+ *                    application and linking to the static lib
+ * 'Cygwin' defines/defaults:
+ *   PNG_BUILD_DLL -- (ignored) building the dll
+ *   (no define)   -- (ignored) building an application, linking to the dll
+ *   PNG_STATIC    -- (ignored) building the static lib, or building an 
+ *                    application that links to the static lib.
+ *   ALL_STATIC    -- (ignored) building various static libs, or building an 
+ *                    application that links to the static libs.
+ * Thus,
+ * a cygwin user should define either PNG_BUILD_DLL or PNG_STATIC, and
+ * this bit of #ifdefs will define the 'correct' config variables based on
+ * that. If a cygwin user *wants* to define 'PNG_USE_DLL' that's okay, but
+ * unnecessary.
+ *
+ * Also, the precedence order is:
+ *   ALL_STATIC (since we can't #undef something outside our namespace)
+ *   PNG_BUILD_DLL
+ *   PNG_STATIC
+ *   (nothing) == PNG_USE_DLL
+ * 
+ * CYGWIN (2002-01-20): The preceding is now obsolete. With the advent
+ *   of auto-import in binutils, we no longer need to worry about 
+ *   __declspec(dllexport) / __declspec(dllimport) and friends.  Therefore,
+ *   we don't need to worry about PNG_STATIC or ALL_STATIC when it comes
+ *   to __declspec() stuff.  However, we DO need to worry about 
+ *   PNG_BUILD_DLL and PNG_STATIC because those change some defaults
+ *   such as CONSOLE_IO and whether GLOBAL_ARRAYS are allowed.
+ */
+#if defined(__CYGWIN__)
+#  if defined(ALL_STATIC)
+#    if defined(PNG_BUILD_DLL)
+#      undef PNG_BUILD_DLL
+#    endif
+#    if defined(PNG_USE_DLL)
+#      undef PNG_USE_DLL
+#    endif
+#    if defined(PNG_DLL)
+#      undef PNG_DLL
+#    endif
+#    if !defined(PNG_STATIC)
+#      define PNG_STATIC
+#    endif
+#  else
+#    if defined (PNG_BUILD_DLL)
+#      if defined(PNG_STATIC)
+#        undef PNG_STATIC
+#      endif
+#      if defined(PNG_USE_DLL)
+#        undef PNG_USE_DLL
+#      endif
+#      if !defined(PNG_DLL)
+#        define PNG_DLL
+#      endif
+#    else
+#      if defined(PNG_STATIC)
+#        if defined(PNG_USE_DLL)
+#          undef PNG_USE_DLL
+#        endif
+#        if defined(PNG_DLL)
+#          undef PNG_DLL
+#        endif
+#      else
+#        if !defined(PNG_USE_DLL)
+#          define PNG_USE_DLL
+#        endif
+#        if !defined(PNG_DLL)
+#          define PNG_DLL
+#        endif
+#      endif  
+#    endif  
+#  endif
+#endif
+
+/* This protects us against compilers that run on a windowing system
+ * and thus don't have or would rather us not use the stdio types:
+ * stdin, stdout, and stderr.  The only one currently used is stderr
+ * in png_error() and png_warning().  #defining PNG_NO_CONSOLE_IO will
+ * prevent these from being compiled and used. #defining PNG_NO_STDIO
+ * will also prevent these, plus will prevent the entire set of stdio
+ * macros and functions (FILE *, printf, etc.) from being compiled and used,
+ * unless (PNG_DEBUG > 0) has been #defined.
+ *
+ * #define PNG_NO_CONSOLE_IO
+ * #define PNG_NO_STDIO
+ */
+
+#if defined(_WIN32_WCE)
+#  include <windows.h>
+   /* Console I/O functions are not supported on WindowsCE */
+#  define PNG_NO_CONSOLE_IO
+#  ifdef PNG_DEBUG
+#    undef PNG_DEBUG
+#  endif
+#endif
+
+#ifdef PNG_BUILD_DLL
+#  ifndef PNG_CONSOLE_IO_SUPPORTED
+#    ifndef PNG_NO_CONSOLE_IO
+#      define PNG_NO_CONSOLE_IO
+#    endif
+#  endif
+#endif
+
+#  ifdef PNG_NO_STDIO
+#    ifndef PNG_NO_CONSOLE_IO
+#      define PNG_NO_CONSOLE_IO
+#    endif
+#    ifdef PNG_DEBUG
+#      if (PNG_DEBUG > 0)
+#        include <stdio.h>
+#      endif
+#    endif
+#  else
+#    if !defined(_WIN32_WCE)
+/* "stdio.h" functions are not supported on WindowsCE */
+#      include <stdio.h>
+#    endif
+#  endif
+
+/* This macro protects us against machines that don't have function
+ * prototypes (ie K&R style headers).  If your compiler does not handle
+ * function prototypes, define this macro and use the included ansi2knr.
+ * I've always been able to use _NO_PROTO as the indicator, but you may
+ * need to drag the empty declaration out in front of here, or change the
+ * ifdef to suit your own needs.
+ */
+#ifndef PNGARG
+
+#ifdef OF /* zlib prototype munger */
+#  define PNGARG(arglist) OF(arglist)
+#else
+
+#ifdef _NO_PROTO
+#  define PNGARG(arglist) ()
+#  ifndef PNG_TYPECAST_NULL
+#     define PNG_TYPECAST_NULL
+#  endif
+#else
+#  define PNGARG(arglist) arglist
+#endif /* _NO_PROTO */
+
+
+#endif /* OF */
+
+#endif /* PNGARG */
+
+/* Try to determine if we are compiling on a Mac.  Note that testing for
+ * just __MWERKS__ is not good enough, because the Codewarrior is now used
+ * on non-Mac platforms.
+ */
+#ifndef MACOS
+#  if (defined(__MWERKS__) && defined(macintosh)) || defined(applec) || \
+      defined(THINK_C) || defined(__SC__) || defined(TARGET_OS_MAC)
+#    define MACOS
+#  endif
+#endif
+
+/* enough people need this for various reasons to include it here */
+#if !defined(MACOS) && !defined(RISCOS) && !defined(_WIN32_WCE)
+#  include <sys/types.h>
+#endif
+
+#if !defined(PNG_SETJMP_NOT_SUPPORTED) && !defined(PNG_NO_SETJMP_SUPPORTED)
+#  define PNG_SETJMP_SUPPORTED
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* This is an attempt to force a single setjmp behaviour on Linux.  If
+ * the X config stuff didn't define _BSD_SOURCE we wouldn't need this.
+ */
+
+#  ifdef __linux__
+#    ifdef _BSD_SOURCE
+#      define PNG_SAVE_BSD_SOURCE
+#      undef _BSD_SOURCE
+#    endif
+#    ifdef _SETJMP_H
+     /* If you encounter a compiler error here, see the explanation
+      * near the end of INSTALL.
+      */
+         __pngconf.h__ already includes setjmp.h;
+         __dont__ include it again.;
+#    endif
+#  endif /* __linux__ */
+
+   /* include setjmp.h for error handling */
+#  include <setjmp.h>
+
+#  ifdef __linux__
+#    ifdef PNG_SAVE_BSD_SOURCE
+#      ifndef _BSD_SOURCE
+#        define _BSD_SOURCE
+#      endif
+#      undef PNG_SAVE_BSD_SOURCE
+#    endif
+#  endif /* __linux__ */
+#endif /* PNG_SETJMP_SUPPORTED */
+
+#ifdef BSD
+#  include <strings.h>
+#else
+#  include <string.h>
+#endif
+
+/* Other defines for things like memory and the like can go here.  */
+#ifdef PNG_INTERNAL
+
+#include <stdlib.h>
+
+/* The functions exported by PNG_EXTERN are PNG_INTERNAL functions, which
+ * aren't usually used outside the library (as far as I know), so it is
+ * debatable if they should be exported at all.  In the future, when it is
+ * possible to have run-time registry of chunk-handling functions, some of
+ * these will be made available again.
+#define PNG_EXTERN extern
+ */
+#define PNG_EXTERN
+
+/* Other defines specific to compilers can go here.  Try to keep
+ * them inside an appropriate ifdef/endif pair for portability.
+ */
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED)
+#  if defined(MACOS)
+     /* We need to check that <math.h> hasn't already been included earlier
+      * as it seems it doesn't agree with <fp.h>, yet we should really use
+      * <fp.h> if possible.
+      */
+#    if !defined(__MATH_H__) && !defined(__MATH_H) && !defined(__cmath__)
+#      include <fp.h>
+#    endif
+#  else
+#    include <math.h>
+#  endif
+#  if defined(_AMIGA) && defined(__SASC) && defined(_M68881)
+     /* Amiga SAS/C: We must include builtin FPU functions when compiling using
+      * MATH=68881
+      */
+#    include <m68881.h>
+#  endif
+#endif
+
+/* Codewarrior on NT has linking problems without this. */
+#if (defined(__MWERKS__) && defined(WIN32)) || defined(__STDC__)
+#  define PNG_ALWAYS_EXTERN
+#endif
+
+/* This provides the non-ANSI (far) memory allocation routines. */
+#if defined(__TURBOC__) && defined(__MSDOS__)
+#  include <mem.h>
+#  include <alloc.h>
+#endif
+
+/* I have no idea why is this necessary... */
+#if defined(_MSC_VER) && (defined(WIN32) || defined(_Windows) || \
+    defined(_WINDOWS) || defined(_WIN32) || defined(__WIN32__))
+#  include <malloc.h>
+#endif
+
+/* This controls how fine the dithering gets.  As this allocates
+ * a largish chunk of memory (32K), those who are not as concerned
+ * with dithering quality can decrease some or all of these.
+ */
+#ifndef PNG_DITHER_RED_BITS
+#  define PNG_DITHER_RED_BITS 5
+#endif
+#ifndef PNG_DITHER_GREEN_BITS
+#  define PNG_DITHER_GREEN_BITS 5
+#endif
+#ifndef PNG_DITHER_BLUE_BITS
+#  define PNG_DITHER_BLUE_BITS 5
+#endif
+
+/* This controls how fine the gamma correction becomes when you
+ * are only interested in 8 bits anyway.  Increasing this value
+ * results in more memory being used, and more pow() functions
+ * being called to fill in the gamma tables.  Don't set this value
+ * less then 8, and even that may not work (I haven't tested it).
+ */
+
+#ifndef PNG_MAX_GAMMA_8
+#  define PNG_MAX_GAMMA_8 11
+#endif
+
+/* This controls how much a difference in gamma we can tolerate before
+ * we actually start doing gamma conversion.
+ */
+#ifndef PNG_GAMMA_THRESHOLD
+#  define PNG_GAMMA_THRESHOLD 0.05
+#endif
+
+#endif /* PNG_INTERNAL */
+
+/* The following uses const char * instead of char * for error
+ * and warning message functions, so some compilers won't complain.
+ * If you do not want to use const, define PNG_NO_CONST here.
+ */
+
+#ifndef PNG_NO_CONST
+#  define PNG_CONST const
+#else
+#  define PNG_CONST
+#endif
+
+/* The following defines give you the ability to remove code from the
+ * library that you will not be using.  I wish I could figure out how to
+ * automate this, but I can't do that without making it seriously hard
+ * on the users.  So if you are not using an ability, change the #define
+ * to and #undef, and that part of the library will not be compiled.  If
+ * your linker can't find a function, you may want to make sure the
+ * ability is defined here.  Some of these depend upon some others being
+ * defined.  I haven't figured out all the interactions here, so you may
+ * have to experiment awhile to get everything to compile.  If you are
+ * creating or using a shared library, you probably shouldn't touch this,
+ * as it will affect the size of the structures, and this will cause bad
+ * things to happen if the library and/or application ever change.
+ */
+
+/* Any features you will not be using can be undef'ed here */
+
+/* GR-P, 0.96a: Set "*TRANSFORMS_SUPPORTED as default but allow user
+ * to turn it off with "*TRANSFORMS_NOT_SUPPORTED" or *PNG_NO_*_TRANSFORMS
+ * on the compile line, then pick and choose which ones to define without
+ * having to edit this file. It is safe to use the *TRANSFORMS_NOT_SUPPORTED
+ * if you only want to have a png-compliant reader/writer but don't need
+ * any of the extra transformations.  This saves about 80 kbytes in a
+ * typical installation of the library. (PNG_NO_* form added in version
+ * 1.0.1c, for consistency)
+ */
+
+/* The size of the png_text structure changed in libpng-1.0.6 when
+ * iTXt support was added.  iTXt support was turned off by default through
+ * libpng-1.2.x, to support old apps that malloc the png_text structure
+ * instead of calling png_set_text() and letting libpng malloc it.  It
+ * was turned on by default in libpng-1.3.0.
+ */
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+#  ifndef PNG_NO_iTXt_SUPPORTED
+#    define PNG_NO_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_iTXt
+#    define PNG_NO_READ_iTXt
+#  endif
+#  ifndef PNG_NO_WRITE_iTXt
+#    define PNG_NO_WRITE_iTXt
+#  endif
+#endif
+
+#if !defined(PNG_NO_iTXt_SUPPORTED)
+#  if !defined(PNG_READ_iTXt_SUPPORTED) && !defined(PNG_NO_READ_iTXt)
+#    define PNG_READ_iTXt
+#  endif
+#  if !defined(PNG_WRITE_iTXt_SUPPORTED) && !defined(PNG_NO_WRITE_iTXt)
+#    define PNG_WRITE_iTXt
+#  endif
+#endif
+
+/* The following support, added after version 1.0.0, can be turned off here en
+ * masse by defining PNG_LEGACY_SUPPORTED in case you need binary compatibility
+ * with old applications that require the length of png_struct and png_info
+ * to remain unchanged.
+ */
+
+#ifdef PNG_LEGACY_SUPPORTED
+#  define PNG_NO_FREE_ME
+#  define PNG_NO_READ_UNKNOWN_CHUNKS
+#  define PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_NO_READ_USER_CHUNKS
+#  define PNG_NO_READ_iCCP
+#  define PNG_NO_WRITE_iCCP
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_READ_sCAL
+#  define PNG_NO_WRITE_sCAL
+#  define PNG_NO_READ_sPLT
+#  define PNG_NO_WRITE_sPLT
+#  define PNG_NO_INFO_IMAGE
+#  define PNG_NO_READ_RGB_TO_GRAY
+#  define PNG_NO_READ_USER_TRANSFORM
+#  define PNG_NO_WRITE_USER_TRANSFORM
+#  define PNG_NO_USER_MEM
+#  define PNG_NO_READ_EMPTY_PLTE
+#  define PNG_NO_MNG_FEATURES
+#  define PNG_NO_FIXED_POINT_SUPPORTED
+#endif
+
+/* Ignore attempt to turn off both floating and fixed point support */
+#if !defined(PNG_FLOATING_POINT_SUPPORTED) || \
+    !defined(PNG_NO_FIXED_POINT_SUPPORTED)
+#  define PNG_FIXED_POINT_SUPPORTED
+#endif
+
+#ifndef PNG_NO_FREE_ME
+#  define PNG_FREE_ME_SUPPORTED
+#endif
+
+#if defined(PNG_READ_SUPPORTED)
+
+#if !defined(PNG_READ_TRANSFORMS_NOT_SUPPORTED) && \
+      !defined(PNG_NO_READ_TRANSFORMS)
+#  define PNG_READ_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_READ_EXPAND
+#    define PNG_READ_EXPAND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SHIFT
+#    define PNG_READ_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACK
+#    define PNG_READ_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_BGR
+#    define PNG_READ_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP
+#    define PNG_READ_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACKSWAP
+#    define PNG_READ_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT
+#    define PNG_READ_INVERT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_DITHER
+#    define PNG_READ_DITHER_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_BACKGROUND
+#    define PNG_READ_BACKGROUND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_16_TO_8
+#    define PNG_READ_16_TO_8_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_FILLER
+#    define PNG_READ_FILLER_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GAMMA
+#    define PNG_READ_GAMMA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GRAY_TO_RGB
+#    define PNG_READ_GRAY_TO_RGB_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP_ALPHA
+#    define PNG_READ_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT_ALPHA
+#    define PNG_READ_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_STRIP_ALPHA
+#    define PNG_READ_STRIP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_USER_TRANSFORM
+#    define PNG_READ_USER_TRANSFORM_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_RGB_TO_GRAY
+#    define PNG_READ_RGB_TO_GRAY_SUPPORTED
+#  endif
+#endif /* PNG_READ_TRANSFORMS_SUPPORTED */
+
+#if !defined(PNG_NO_PROGRESSIVE_READ) && \
+ !defined(PNG_PROGRESSIVE_READ_SUPPORTED) /* if you don't do progressive   */
+#  define PNG_PROGRESSIVE_READ_SUPPORTED  /* reading.  This is not talking */
+#endif                            /* about interlacing capability!  You'll */
+           /* still have interlacing unless you change the following line: */
+
+#define PNG_READ_INTERLACING_SUPPORTED /* required in PNG-compliant decoders */
+
+#ifndef PNG_NO_READ_COMPOSITE_NODIV
+#  ifndef PNG_NO_READ_COMPOSITED_NODIV  /* libpng-1.0.x misspelling */
+#    define PNG_READ_COMPOSITE_NODIV_SUPPORTED  /* well tested on Intel, SGI */
+#  endif
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated, will be removed from version 2.0.0.
+   Use PNG_MNG_FEATURES_SUPPORTED instead. */
+#ifndef PNG_NO_READ_EMPTY_PLTE
+#  define PNG_READ_EMPTY_PLTE_SUPPORTED
+#endif
+#endif
+
+#endif /* PNG_READ_SUPPORTED */
+
+#if defined(PNG_WRITE_SUPPORTED)
+
+# if !defined(PNG_WRITE_TRANSFORMS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_TRANSFORMS)
+#  define PNG_WRITE_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_WRITE_SHIFT
+#    define PNG_WRITE_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACK
+#    define PNG_WRITE_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_BGR
+#    define PNG_WRITE_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP
+#    define PNG_WRITE_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACKSWAP
+#    define PNG_WRITE_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT
+#    define PNG_WRITE_INVERT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_FILLER
+#    define PNG_WRITE_FILLER_SUPPORTED   /* same as WRITE_STRIP_ALPHA */
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP_ALPHA
+#    define PNG_WRITE_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT_ALPHA
+#    define PNG_WRITE_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_USER_TRANSFORM
+#    define PNG_WRITE_USER_TRANSFORM_SUPPORTED
+#  endif
+#endif /* PNG_WRITE_TRANSFORMS_SUPPORTED */
+
+#if !defined(PNG_NO_WRITE_INTERLACING_SUPPORTED) && \
+    !defined(PNG_WRITE_INTERLACING_SUPPORTED)
+#define PNG_WRITE_INTERLACING_SUPPORTED  /* not required for PNG-compliant
+                                            encoders, but can cause trouble
+                                            if left undefined */
+#endif
+
+#if !defined(PNG_NO_WRITE_WEIGHTED_FILTER) && \
+    !defined(PNG_WRITE_WEIGHTED_FILTER) && \
+     defined(PNG_FLOATING_POINT_SUPPORTED)
+#  define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
+#endif
+
+#ifndef PNG_NO_WRITE_FLUSH
+#  define PNG_WRITE_FLUSH_SUPPORTED
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated, see PNG_MNG_FEATURES_SUPPORTED, above */
+#ifndef PNG_NO_WRITE_EMPTY_PLTE
+#  define PNG_WRITE_EMPTY_PLTE_SUPPORTED
+#endif
+#endif
+
+#endif /* PNG_WRITE_SUPPORTED */
+
+#ifndef PNG_1_0_X
+#  ifndef PNG_NO_ERROR_NUMBERS
+#    define PNG_ERROR_NUMBERS_SUPPORTED
+#  endif
+#endif /* PNG_1_0_X */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+#  ifndef PNG_NO_USER_TRANSFORM_PTR
+#    define PNG_USER_TRANSFORM_PTR_SUPPORTED
+#  endif
+#endif
+
+#ifndef PNG_NO_STDIO
+#  define PNG_TIME_RFC1123_SUPPORTED
+#endif
+
+/* This adds extra functions in pngget.c for accessing data from the
+ * info pointer (added in version 0.99)
+ * png_get_image_width()
+ * png_get_image_height()
+ * png_get_bit_depth()
+ * png_get_color_type()
+ * png_get_compression_type()
+ * png_get_filter_type()
+ * png_get_interlace_type()
+ * png_get_pixel_aspect_ratio()
+ * png_get_pixels_per_meter()
+ * png_get_x_offset_pixels()
+ * png_get_y_offset_pixels()
+ * png_get_x_offset_microns()
+ * png_get_y_offset_microns()
+ */
+#if !defined(PNG_NO_EASY_ACCESS) && !defined(PNG_EASY_ACCESS_SUPPORTED)
+#  define PNG_EASY_ACCESS_SUPPORTED
+#endif
+
+/* PNG_ASSEMBLER_CODE was enabled by default in version 1.2.0 
+ * and removed from version 1.2.20.  The following will be removed
+ * from libpng-1.4.0
+*/
+
+#if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_OPTIMIZED_CODE)
+#  ifndef PNG_OPTIMIZED_CODE_SUPPORTED
+#    define PNG_OPTIMIZED_CODE_SUPPORTED
+#  endif
+#endif
+
+#if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_ASSEMBLER_CODE)
+#  ifndef PNG_ASSEMBLER_CODE_SUPPORTED
+#    define PNG_ASSEMBLER_CODE_SUPPORTED
+#  endif
+
+#  if defined(__GNUC__) && defined(__x86_64__) && (__GNUC__ < 4)
+     /* work around 64-bit gcc compiler bugs in gcc-3.x */
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if defined(__APPLE__)
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if (defined(__MWERKS__) && ((__MWERKS__ < 0x0900) || macintosh))
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#    define PNG_MMX_CODE_SUPPORTED
+#  endif
+
+#endif
+/* end of obsolete code to be removed from libpng-1.4.0 */
+
+#if !defined(PNG_1_0_X)
+#if !defined(PNG_NO_USER_MEM) && !defined(PNG_USER_MEM_SUPPORTED)
+#  define PNG_USER_MEM_SUPPORTED
+#endif
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.2.6 */
+#if !defined(PNG_1_0_X)
+#ifndef PNG_SET_USER_LIMITS_SUPPORTED
+#if !defined(PNG_NO_SET_USER_LIMITS) && !defined(PNG_SET_USER_LIMITS_SUPPORTED)
+#  define PNG_SET_USER_LIMITS_SUPPORTED
+#endif
+#endif
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.0.16 and 1.2.6.  To accept all valid PNGS no matter
+ * how large, set these limits to 0x7fffffffL
+ */
+#ifndef PNG_USER_WIDTH_MAX
+#  define PNG_USER_WIDTH_MAX 1000000L
+#endif
+#ifndef PNG_USER_HEIGHT_MAX
+#  define PNG_USER_HEIGHT_MAX 1000000L
+#endif
+
+/* These are currently experimental features, define them if you want */
+
+/* very little testing */
+/*
+#ifdef PNG_READ_SUPPORTED
+#  ifndef PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#    define PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#  endif
+#endif
+*/
+
+/* This is only for PowerPC big-endian and 680x0 systems */
+/* some testing */
+/*
+#ifndef PNG_READ_BIG_ENDIAN_SUPPORTED
+#  define PNG_READ_BIG_ENDIAN_SUPPORTED
+#endif
+*/
+
+/* Buggy compilers (e.g., gcc 2.7.2.2) need this */
+/*
+#define PNG_NO_POINTER_INDEXING
+*/
+
+/* These functions are turned off by default, as they will be phased out. */
+/*
+#define  PNG_USELESS_TESTS_SUPPORTED
+#define  PNG_CORRECT_PALETTE_SUPPORTED
+*/
+
+/* Any chunks you are not interested in, you can undef here.  The
+ * ones that allocate memory may be expecially important (hIST,
+ * tEXt, zTXt, tRNS, pCAL).  Others will just save time and make png_info
+ * a bit smaller.
+ */
+
+#if defined(PNG_READ_SUPPORTED) && \
+    !defined(PNG_READ_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_READ_ANCILLARY_CHUNKS)
+#  define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+#if defined(PNG_WRITE_SUPPORTED) && \
+    !defined(PNG_WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_ANCILLARY_CHUNKS)
+#  define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_READ_TEXT
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_READ_tEXt
+#  define PNG_NO_READ_zTXt
+#endif
+#ifndef PNG_NO_READ_bKGD
+#  define PNG_READ_bKGD_SUPPORTED
+#  define PNG_bKGD_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_cHRM
+#  define PNG_READ_cHRM_SUPPORTED
+#  define PNG_cHRM_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_gAMA
+#  define PNG_READ_gAMA_SUPPORTED
+#  define PNG_gAMA_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_hIST
+#  define PNG_READ_hIST_SUPPORTED
+#  define PNG_hIST_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iCCP
+#  define PNG_READ_iCCP_SUPPORTED
+#  define PNG_iCCP_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iTXt
+#  ifndef PNG_READ_iTXt_SUPPORTED
+#    define PNG_READ_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_READ_oFFs
+#  define PNG_READ_oFFs_SUPPORTED
+#  define PNG_oFFs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pCAL
+#  define PNG_READ_pCAL_SUPPORTED
+#  define PNG_pCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sCAL
+#  define PNG_READ_sCAL_SUPPORTED
+#  define PNG_sCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pHYs
+#  define PNG_READ_pHYs_SUPPORTED
+#  define PNG_pHYs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sBIT
+#  define PNG_READ_sBIT_SUPPORTED
+#  define PNG_sBIT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sPLT
+#  define PNG_READ_sPLT_SUPPORTED
+#  define PNG_sPLT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sRGB
+#  define PNG_READ_sRGB_SUPPORTED
+#  define PNG_sRGB_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tEXt
+#  define PNG_READ_tEXt_SUPPORTED
+#  define PNG_tEXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tIME
+#  define PNG_READ_tIME_SUPPORTED
+#  define PNG_tIME_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tRNS
+#  define PNG_READ_tRNS_SUPPORTED
+#  define PNG_tRNS_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_zTXt
+#  define PNG_READ_zTXt_SUPPORTED
+#  define PNG_zTXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_UNKNOWN_CHUNKS
+#  define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#    define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#  endif
+#endif
+#if !defined(PNG_NO_READ_USER_CHUNKS) && \
+     defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+#  define PNG_READ_USER_CHUNKS_SUPPORTED
+#  define PNG_USER_CHUNKS_SUPPORTED
+#  ifdef PNG_NO_READ_UNKNOWN_CHUNKS
+#    undef PNG_NO_READ_UNKNOWN_CHUNKS
+#  endif
+#  ifdef PNG_NO_HANDLE_AS_UNKNOWN
+#    undef PNG_NO_HANDLE_AS_UNKNOWN
+#  endif
+#endif
+#ifndef PNG_NO_READ_OPT_PLTE
+#  define PNG_READ_OPT_PLTE_SUPPORTED /* only affects support of the */
+#endif                      /* optional PLTE chunk in RGB and RGBA images */
+#if defined(PNG_READ_iTXt_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) || \
+    defined(PNG_READ_zTXt_SUPPORTED)
+#  define PNG_READ_TEXT_SUPPORTED
+#  define PNG_TEXT_SUPPORTED
+#endif
+
+#endif /* PNG_READ_ANCILLARY_CHUNKS_SUPPORTED */
+
+#ifdef PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_WRITE_TEXT
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_WRITE_tEXt
+#  define PNG_NO_WRITE_zTXt
+#endif
+#ifndef PNG_NO_WRITE_bKGD
+#  define PNG_WRITE_bKGD_SUPPORTED
+#  ifndef PNG_bKGD_SUPPORTED
+#    define PNG_bKGD_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_cHRM
+#  define PNG_WRITE_cHRM_SUPPORTED
+#  ifndef PNG_cHRM_SUPPORTED
+#    define PNG_cHRM_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_gAMA
+#  define PNG_WRITE_gAMA_SUPPORTED
+#  ifndef PNG_gAMA_SUPPORTED
+#    define PNG_gAMA_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_hIST
+#  define PNG_WRITE_hIST_SUPPORTED
+#  ifndef PNG_hIST_SUPPORTED
+#    define PNG_hIST_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iCCP
+#  define PNG_WRITE_iCCP_SUPPORTED
+#  ifndef PNG_iCCP_SUPPORTED
+#    define PNG_iCCP_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iTXt
+#  ifndef PNG_WRITE_iTXt_SUPPORTED
+#    define PNG_WRITE_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_oFFs
+#  define PNG_WRITE_oFFs_SUPPORTED
+#  ifndef PNG_oFFs_SUPPORTED
+#    define PNG_oFFs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pCAL
+#  define PNG_WRITE_pCAL_SUPPORTED
+#  ifndef PNG_pCAL_SUPPORTED
+#    define PNG_pCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sCAL
+#  define PNG_WRITE_sCAL_SUPPORTED
+#  ifndef PNG_sCAL_SUPPORTED
+#    define PNG_sCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pHYs
+#  define PNG_WRITE_pHYs_SUPPORTED
+#  ifndef PNG_pHYs_SUPPORTED
+#    define PNG_pHYs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sBIT
+#  define PNG_WRITE_sBIT_SUPPORTED
+#  ifndef PNG_sBIT_SUPPORTED
+#    define PNG_sBIT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sPLT
+#  define PNG_WRITE_sPLT_SUPPORTED
+#  ifndef PNG_sPLT_SUPPORTED
+#    define PNG_sPLT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sRGB
+#  define PNG_WRITE_sRGB_SUPPORTED
+#  ifndef PNG_sRGB_SUPPORTED
+#    define PNG_sRGB_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tEXt
+#  define PNG_WRITE_tEXt_SUPPORTED
+#  ifndef PNG_tEXt_SUPPORTED
+#    define PNG_tEXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tIME
+#  define PNG_WRITE_tIME_SUPPORTED
+#  ifndef PNG_tIME_SUPPORTED
+#    define PNG_tIME_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tRNS
+#  define PNG_WRITE_tRNS_SUPPORTED
+#  ifndef PNG_tRNS_SUPPORTED
+#    define PNG_tRNS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_zTXt
+#  define PNG_WRITE_zTXt_SUPPORTED
+#  ifndef PNG_zTXt_SUPPORTED
+#    define PNG_zTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#     ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#       define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#     endif
+#  endif
+#endif
+#if defined(PNG_WRITE_iTXt_SUPPORTED) || defined(PNG_WRITE_tEXt_SUPPORTED) || \
+    defined(PNG_WRITE_zTXt_SUPPORTED)
+#  define PNG_WRITE_TEXT_SUPPORTED
+#  ifndef PNG_TEXT_SUPPORTED
+#    define PNG_TEXT_SUPPORTED
+#  endif
+#endif
+
+#endif /* PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED */
+
+/* Turn this off to disable png_read_png() and
+ * png_write_png() and leave the row_pointers member
+ * out of the info structure.
+ */
+#ifndef PNG_NO_INFO_IMAGE
+#  define PNG_INFO_IMAGE_SUPPORTED
+#endif
+
+/* need the time information for reading tIME chunks */
+#if defined(PNG_tIME_SUPPORTED)
+#  if !defined(_WIN32_WCE)
+     /* "time.h" functions are not supported on WindowsCE */
+#    include <time.h>
+#  endif
+#endif
+
+/* Some typedefs to get us started.  These should be safe on most of the
+ * common platforms.  The typedefs should be at least as large as the
+ * numbers suggest (a png_uint_32 must be at least 32 bits long), but they
+ * don't have to be exactly that size.  Some compilers dislike passing
+ * unsigned shorts as function parameters, so you may be better off using
+ * unsigned int for png_uint_16.  Likewise, for 64-bit systems, you may
+ * want to have unsigned int for png_uint_32 instead of unsigned long.
+ */
+
+typedef unsigned long png_uint_32;
+typedef long png_int_32;
+typedef unsigned short png_uint_16;
+typedef short png_int_16;
+typedef unsigned char png_byte;
+
+/* This is usually size_t.  It is typedef'ed just in case you need it to
+   change (I'm not sure if you will or not, so I thought I'd be safe) */
+#ifdef PNG_SIZE_T
+   typedef PNG_SIZE_T png_size_t;
+#  define png_sizeof(x) png_convert_size(sizeof (x))
+#else
+   typedef size_t png_size_t;
+#  define png_sizeof(x) sizeof (x)
+#endif
+
+/* The following is needed for medium model support.  It cannot be in the
+ * PNG_INTERNAL section.  Needs modification for other compilers besides
+ * MSC.  Model independent support declares all arrays and pointers to be
+ * large using the far keyword.  The zlib version used must also support
+ * model independent data.  As of version zlib 1.0.4, the necessary changes
+ * have been made in zlib.  The USE_FAR_KEYWORD define triggers other
+ * changes that are needed. (Tim Wegner)
+ */
+
+/* Separate compiler dependencies (problem here is that zlib.h always
+   defines FAR. (SJT) */
+#ifdef __BORLANDC__
+#  if defined(__LARGE__) || defined(__HUGE__) || defined(__COMPACT__)
+#    define LDATA 1
+#  else
+#    define LDATA 0
+#  endif
+   /* GRR:  why is Cygwin in here?  Cygwin is not Borland C... */
+#  if !defined(__WIN32__) && !defined(__FLAT__) && !defined(__CYGWIN__)
+#    define PNG_MAX_MALLOC_64K
+#    if (LDATA != 1)
+#      ifndef FAR
+#        define FAR __far
+#      endif
+#      define USE_FAR_KEYWORD
+#    endif   /* LDATA != 1 */
+     /* Possibly useful for moving data out of default segment.
+      * Uncomment it if you want. Could also define FARDATA as
+      * const if your compiler supports it. (SJT)
+#    define FARDATA FAR
+      */
+#  endif  /* __WIN32__, __FLAT__, __CYGWIN__ */
+#endif   /* __BORLANDC__ */
+
+
+/* Suggest testing for specific compiler first before testing for
+ * FAR.  The Watcom compiler defines both __MEDIUM__ and M_I86MM,
+ * making reliance oncertain keywords suspect. (SJT)
+ */
+
+/* MSC Medium model */
+#if defined(FAR)
+#  if defined(M_I86MM)
+#    define USE_FAR_KEYWORD
+#    define FARDATA FAR
+#    include <dos.h>
+#  endif
+#endif
+
+/* SJT: default case */
+#ifndef FAR
+#  define FAR
+#endif
+
+/* At this point FAR is always defined */
+#ifndef FARDATA
+#  define FARDATA
+#endif
+
+/* Typedef for floating-point numbers that are converted
+   to fixed-point with a multiple of 100,000, e.g., int_gamma */
+typedef png_int_32 png_fixed_point;
+
+/* Add typedefs for pointers */
+typedef void            FAR * png_voidp;
+typedef png_byte        FAR * png_bytep;
+typedef png_uint_32     FAR * png_uint_32p;
+typedef png_int_32      FAR * png_int_32p;
+typedef png_uint_16     FAR * png_uint_16p;
+typedef png_int_16      FAR * png_int_16p;
+typedef PNG_CONST char  FAR * png_const_charp;
+typedef char            FAR * png_charp;
+typedef png_fixed_point FAR * png_fixed_point_p;
+
+#ifndef PNG_NO_STDIO
+#if defined(_WIN32_WCE)
+typedef HANDLE                png_FILE_p;
+#else
+typedef FILE                * png_FILE_p;
+#endif
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * png_doublep;
+#endif
+
+/* Pointers to pointers; i.e. arrays */
+typedef png_byte        FAR * FAR * png_bytepp;
+typedef png_uint_32     FAR * FAR * png_uint_32pp;
+typedef png_int_32      FAR * FAR * png_int_32pp;
+typedef png_uint_16     FAR * FAR * png_uint_16pp;
+typedef png_int_16      FAR * FAR * png_int_16pp;
+typedef PNG_CONST char  FAR * FAR * png_const_charpp;
+typedef char            FAR * FAR * png_charpp;
+typedef png_fixed_point FAR * FAR * png_fixed_point_pp;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * FAR * png_doublepp;
+#endif
+
+/* Pointers to pointers to pointers; i.e., pointer to array */
+typedef char            FAR * FAR * FAR * png_charppp;
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* SPC -  Is this stuff deprecated? */
+/* It'll be removed as of libpng-1.3.0 - GR-P */
+/* libpng typedefs for types in zlib. If zlib changes
+ * or another compression library is used, then change these.
+ * Eliminates need to change all the source files.
+ */
+typedef charf *         png_zcharp;
+typedef charf * FAR *   png_zcharpp;
+typedef z_stream FAR *  png_zstreamp;
+#endif /* (PNG_1_0_X) || defined(PNG_1_2_X) */
+
+/*
+ * Define PNG_BUILD_DLL if the module being built is a Windows
+ * LIBPNG DLL.
+ *
+ * Define PNG_USE_DLL if you want to *link* to the Windows LIBPNG DLL.
+ * It is equivalent to Microsoft predefined macro _DLL that is
+ * automatically defined when you compile using the share
+ * version of the CRT (C Run-Time library)
+ *
+ * The cygwin mods make this behavior a little different:
+ * Define PNG_BUILD_DLL if you are building a dll for use with cygwin
+ * Define PNG_STATIC if you are building a static library for use with cygwin,
+ *   -or- if you are building an application that you want to link to the
+ *   static library.
+ * PNG_USE_DLL is defined by default (no user action needed) unless one of
+ *   the other flags is defined.
+ */
+
+#if !defined(PNG_DLL) && (defined(PNG_BUILD_DLL) || defined(PNG_USE_DLL))
+#  define PNG_DLL
+#endif
+/* If CYGWIN, then disallow GLOBAL ARRAYS unless building a static lib.
+ * When building a static lib, default to no GLOBAL ARRAYS, but allow
+ * command-line override
+ */
+#if defined(__CYGWIN__)
+#  if !defined(PNG_STATIC)
+#    if defined(PNG_USE_GLOBAL_ARRAYS)
+#      undef PNG_USE_GLOBAL_ARRAYS
+#    endif
+#    if !defined(PNG_USE_LOCAL_ARRAYS)
+#      define PNG_USE_LOCAL_ARRAYS
+#    endif
+#  else
+#    if defined(PNG_USE_LOCAL_ARRAYS) || defined(PNG_NO_GLOBAL_ARRAYS)
+#      if defined(PNG_USE_GLOBAL_ARRAYS)
+#        undef PNG_USE_GLOBAL_ARRAYS
+#      endif
+#    endif
+#  endif
+#  if !defined(PNG_USE_LOCAL_ARRAYS) && !defined(PNG_USE_GLOBAL_ARRAYS)
+#    define PNG_USE_LOCAL_ARRAYS
+#  endif
+#endif
+
+/* Do not use global arrays (helps with building DLL's)
+ * They are no longer used in libpng itself, since version 1.0.5c,
+ * but might be required for some pre-1.0.5c applications.
+ */
+#if !defined(PNG_USE_LOCAL_ARRAYS) && !defined(PNG_USE_GLOBAL_ARRAYS)
+#  if defined(PNG_NO_GLOBAL_ARRAYS) || \
+      (defined(__GNUC__) && defined(PNG_DLL)) || defined(_MSC_VER)
+#    define PNG_USE_LOCAL_ARRAYS
+#  else
+#    define PNG_USE_GLOBAL_ARRAYS
+#  endif
+#endif
+
+#if defined(__CYGWIN__)
+#  undef PNGAPI
+#  define PNGAPI __cdecl
+#  undef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif  
+
+/* If you define PNGAPI, e.g., with compiler option "-DPNGAPI=__stdcall",
+ * you may get warnings regarding the linkage of png_zalloc and png_zfree.
+ * Don't ignore those warnings; you must also reset the default calling
+ * convention in your compiler to match your PNGAPI, and you must build
+ * zlib and your applications the same way you build libpng.
+ */
+
+#if defined(__MINGW32__) && !defined(PNG_MODULEDEF)
+#  ifndef PNG_NO_MODULEDEF
+#    define PNG_NO_MODULEDEF
+#  endif
+#endif
+
+#if !defined(PNG_IMPEXP) && defined(PNG_BUILD_DLL) && !defined(PNG_NO_MODULEDEF)
+#  define PNG_IMPEXP
+#endif
+
+#if defined(PNG_DLL) || defined(_DLL) || defined(__DLL__ ) || \
+    (( defined(_Windows) || defined(_WINDOWS) || \
+       defined(WIN32) || defined(_WIN32) || defined(__WIN32__) ))
+
+#  ifndef PNGAPI
+#     if defined(__GNUC__) || (defined (_MSC_VER) && (_MSC_VER >= 800))
+#        define PNGAPI __cdecl
+#     else
+#        define PNGAPI _cdecl
+#     endif
+#  endif
+
+#  if !defined(PNG_IMPEXP) && (!defined(PNG_DLL) || \
+       0 /* WINCOMPILER_WITH_NO_SUPPORT_FOR_DECLIMPEXP */)
+#     define PNG_IMPEXP
+#  endif
+
+#  if !defined(PNG_IMPEXP)
+
+#     define PNG_EXPORT_TYPE1(type,symbol)  PNG_IMPEXP type PNGAPI symbol
+#     define PNG_EXPORT_TYPE2(type,symbol)  type PNG_IMPEXP PNGAPI symbol
+
+      /* Borland/Microsoft */
+#     if defined(_MSC_VER) || defined(__BORLANDC__)
+#        if (_MSC_VER >= 800) || (__BORLANDC__ >= 0x500)
+#           define PNG_EXPORT PNG_EXPORT_TYPE1
+#        else
+#           define PNG_EXPORT PNG_EXPORT_TYPE2
+#           if defined(PNG_BUILD_DLL)
+#              define PNG_IMPEXP __export
+#           else
+#              define PNG_IMPEXP /*__import */ /* doesn't exist AFAIK in
+                                                 VC++ */
+#           endif                             /* Exists in Borland C++ for
+                                                 C++ classes (== huge) */
+#        endif
+#     endif
+
+#     if !defined(PNG_IMPEXP)
+#        if defined(PNG_BUILD_DLL)
+#           define PNG_IMPEXP __declspec(dllexport)
+#        else
+#           define PNG_IMPEXP __declspec(dllimport)
+#        endif
+#     endif
+#  endif  /* PNG_IMPEXP */
+#else /* !(DLL || non-cygwin WINDOWS) */
+#   if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
+#      ifndef PNGAPI
+#         define PNGAPI _System
+#      endif
+#   else
+#      if 0 /* ... other platforms, with other meanings */
+#      endif
+#   endif
+#endif
+
+#ifndef PNGAPI
+#  define PNGAPI
+#endif
+#ifndef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif
+
+#ifdef PNG_BUILDSYMS
+#  ifndef PNG_EXPORT
+#    define PNG_EXPORT(type,symbol) PNG_FUNCTION_EXPORT symbol END
+#  endif
+#  ifdef PNG_USE_GLOBAL_ARRAYS
+#    ifndef PNG_EXPORT_VAR
+#      define PNG_EXPORT_VAR(type) PNG_DATA_EXPORT
+#    endif
+#  endif
+#endif
+
+#ifndef PNG_EXPORT
+#  define PNG_EXPORT(type,symbol) PNG_IMPEXP type PNGAPI symbol
+#endif
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+#  ifndef PNG_EXPORT_VAR
+#    define PNG_EXPORT_VAR(type) extern PNG_IMPEXP type
+#  endif
+#endif
+
+/* User may want to use these so they are not in PNG_INTERNAL. Any library
+ * functions that are passed far data must be model independent.
+ */
+
+#ifndef PNG_ABORT
+#  define PNG_ABORT() abort()
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#  define png_jmpbuf(png_ptr) ((png_ptr)->jmpbuf)
+#else
+#  define png_jmpbuf(png_ptr) \
+   (LIBPNG_WAS_COMPILED_WITH__PNG_SETJMP_NOT_SUPPORTED)
+#endif
+
+#if defined(USE_FAR_KEYWORD)  /* memory model independent fns */
+/* use this to make far-to-near assignments */
+#  define CHECK   1
+#  define NOCHECK 0
+#  define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK))
+#  define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK))
+#  define png_snprintf _fsnprintf   /* Added to v 1.2.19 */
+#  define png_strlen  _fstrlen
+#  define png_memcmp  _fmemcmp    /* SJT: added */
+#  define png_memcpy  _fmemcpy
+#  define png_memset  _fmemset
+#else /* use the usual functions */
+#  define CVT_PTR(ptr)         (ptr)
+#  define CVT_PTR_NOCHECK(ptr) (ptr)
+#  ifndef PNG_NO_SNPRINTF
+#    ifdef _MSC_VER
+#      define png_snprintf _snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 _snprintf
+#      define png_snprintf6 _snprintf
+#    else
+#      define png_snprintf snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 snprintf
+#      define png_snprintf6 snprintf
+#    endif
+#  else
+     /* You don't have or don't want to use snprintf().  Caution: Using
+      * sprintf instead of snprintf exposes your application to accidental
+      * or malevolent buffer overflows.  If you don't have snprintf()
+      * as a general rule you should provide one (you can get one from
+      * Portable OpenSSH). */
+#    define png_snprintf(s1,n,fmt,x1) sprintf(s1,fmt,x1)
+#    define png_snprintf2(s1,n,fmt,x1,x2) sprintf(s1,fmt,x1,x2)
+#    define png_snprintf6(s1,n,fmt,x1,x2,x3,x4,x5,x6) \
+        sprintf(s1,fmt,x1,x2,x3,x4,x5,x6)
+#  endif
+#  define png_strlen  strlen
+#  define png_memcmp  memcmp      /* SJT: added */
+#  define png_memcpy  memcpy
+#  define png_memset  memset
+#endif
+/* End of memory model independent support */
+
+/* Just a little check that someone hasn't tried to define something
+ * contradictory.
+ */
+#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
+#  undef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 65536L
+#endif
+
+/* Added at libpng-1.2.8 */
+#endif /* PNG_VERSION_INFO_ONLY */
+
+#endif /* PNGCONF_H */
diff --git a/PNG/pngerror.c b/PNG/pngerror.c
new file mode 100644
index 0000000..b364fc0
--- /dev/null
+++ b/PNG/pngerror.c
@@ -0,0 +1,343 @@
+
+/* pngerror.c - stub functions for i/o and memory allocation
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all error handling.  Users who
+ * need special error handling are expected to write replacement functions
+ * and use png_set_error_fn() to use those functions.  See the instructions
+ * at each function.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+static void /* PRIVATE */
+png_default_error PNGARG((png_structp png_ptr,
+  png_const_charp error_message));
+#ifndef PNG_NO_WARNINGS
+static void /* PRIVATE */
+png_default_warning PNGARG((png_structp png_ptr,
+  png_const_charp warning_message));
+#endif /* PNG_NO_WARNINGS */
+
+/* This function is called whenever there is a fatal error.  This function
+ * should not be changed.  If there is a need to handle errors differently,
+ * you should supply a replacement error function and use png_set_error_fn()
+ * to replace the error function at run-time.
+ */
+#ifndef PNG_NO_ERROR_TEXT
+void PNGAPI
+png_error(png_structp png_ptr, png_const_charp error_message)
+{
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   char msg[16];
+   if (png_ptr != NULL)
+   {
+     if (png_ptr->flags&
+       (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+     {
+       if (*error_message == '#')
+       {
+           int offset;
+           for (offset=1; offset<15; offset++)
+              if (*(error_message+offset) == ' ')
+                  break;
+           if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+           {
+              int i;
+              for (i=0; i<offset-1; i++)
+                 msg[i]=error_message[i+1];
+              msg[i]='\0';
+              error_message=msg;
+           }
+           else
+              error_message+=offset;
+       }
+       else
+       {
+           if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+           {
+              msg[0]='0';
+              msg[1]='\0';
+              error_message=msg;
+           }
+       }
+     }
+   }
+#endif
+   if (png_ptr != NULL && png_ptr->error_fn != NULL)
+      (*(png_ptr->error_fn))(png_ptr, error_message);
+
+   /* If the custom handler doesn't exist, or if it returns,
+      use the default handler, which will not return. */
+   png_default_error(png_ptr, error_message);
+}
+#else
+void PNGAPI
+png_err(png_structp png_ptr)
+{
+   if (png_ptr != NULL && png_ptr->error_fn != NULL)
+      (*(png_ptr->error_fn))(png_ptr, '\0');
+
+   /* If the custom handler doesn't exist, or if it returns,
+      use the default handler, which will not return. */
+   png_default_error(png_ptr, '\0');
+}
+#endif /* PNG_NO_ERROR_TEXT */
+
+#ifndef PNG_NO_WARNINGS
+/* This function is called whenever there is a non-fatal error.  This function
+ * should not be changed.  If there is a need to handle warnings differently,
+ * you should supply a replacement warning function and use
+ * png_set_error_fn() to replace the warning function at run-time.
+ */
+void PNGAPI
+png_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+   int offset = 0;
+   if (png_ptr != NULL)
+   {
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (png_ptr->flags&
+     (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+#endif
+     {
+       if (*warning_message == '#')
+       {
+           for (offset=1; offset<15; offset++)
+              if (*(warning_message+offset) == ' ')
+                  break;
+       }
+     }
+     if (png_ptr != NULL && png_ptr->warning_fn != NULL)
+        (*(png_ptr->warning_fn))(png_ptr, warning_message+offset);
+   }
+   else
+      png_default_warning(png_ptr, warning_message+offset);
+}
+#endif /* PNG_NO_WARNINGS */
+
+
+/* These utilities are used internally to build an error message that relates
+ * to the current chunk.  The chunk name comes from png_ptr->chunk_name,
+ * this is used to prefix the message.  The message is limited in length
+ * to 63 bytes, the name characters are output as hex digits wrapped in []
+ * if the character is invalid.
+ */
+#define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+static PNG_CONST char png_digit[16] = {
+   '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+   'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+#define PNG_MAX_ERROR_TEXT 64
+
+#if !defined(PNG_NO_WARNINGS) || !defined(PNG_NO_ERROR_TEXT)
+static void /* PRIVATE */
+png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp
+   error_message)
+{
+   int iout = 0, iin = 0;
+
+   while (iin < 4)
+   {
+      int c = png_ptr->chunk_name[iin++];
+      if (isnonalpha(c))
+      {
+         buffer[iout++] = '[';
+         buffer[iout++] = png_digit[(c & 0xf0) >> 4];
+         buffer[iout++] = png_digit[c & 0x0f];
+         buffer[iout++] = ']';
+      }
+      else
+      {
+         buffer[iout++] = (png_byte)c;
+      }
+   }
+
+   if (error_message == NULL)
+      buffer[iout] = '\0';
+   else
+   {
+      buffer[iout++] = ':';
+      buffer[iout++] = ' ';
+      png_memcpy(buffer+iout, error_message, PNG_MAX_ERROR_TEXT);
+      buffer[iout+PNG_MAX_ERROR_TEXT-1] = '\0';
+   }
+}
+
+#ifdef PNG_READ_SUPPORTED
+void PNGAPI
+png_chunk_error(png_structp png_ptr, png_const_charp error_message)
+{
+   char msg[18+PNG_MAX_ERROR_TEXT];
+   if (png_ptr == NULL)
+     png_error(png_ptr, error_message);
+   else
+   {
+     png_format_buffer(png_ptr, msg, error_message);
+     png_error(png_ptr, msg);
+   }
+}
+#endif /* PNG_READ_SUPPORTED */
+#endif /* !defined(PNG_NO_WARNINGS) || !defined(PNG_NO_ERROR_TEXT) */
+
+#ifndef PNG_NO_WARNINGS
+void PNGAPI
+png_chunk_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+   char msg[18+PNG_MAX_ERROR_TEXT];
+   if (png_ptr == NULL)
+     png_warning(png_ptr, warning_message);
+   else
+   {
+     png_format_buffer(png_ptr, msg, warning_message);
+     png_warning(png_ptr, msg);
+   }
+}
+#endif /* PNG_NO_WARNINGS */
+
+
+/* This is the default error handling function.  Note that replacements for
+ * this function MUST NOT RETURN, or the program will likely crash.  This
+ * function is used by default, or if the program supplies NULL for the
+ * error function pointer in png_set_error_fn().
+ */
+static void /* PRIVATE */
+png_default_error(png_structp png_ptr, png_const_charp error_message)
+{
+#ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (*error_message == '#')
+   {
+     int offset;
+     char error_number[16];
+     for (offset=0; offset<15; offset++)
+     {
+         error_number[offset] = *(error_message+offset+1);
+         if (*(error_message+offset) == ' ')
+             break;
+     }
+     if((offset > 1) && (offset < 15))
+     {
+       error_number[offset-1]='\0';
+       fprintf(stderr, "libpng error no. %s: %s\n", error_number,
+          error_message+offset);
+     }
+     else
+       fprintf(stderr, "libpng error: %s, offset=%d\n", error_message,offset);
+   }
+   else
+#endif
+   fprintf(stderr, "libpng error: %s\n", error_message);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   if (png_ptr)
+   {
+#  ifdef USE_FAR_KEYWORD
+   {
+      jmp_buf jmpbuf;
+      png_memcpy(jmpbuf, png_ptr->jmpbuf, png_sizeof(jmp_buf));
+      longjmp(jmpbuf, 1);
+   }
+#  else
+   longjmp(png_ptr->jmpbuf, 1);
+#  endif
+   }
+#else
+   PNG_ABORT();
+#endif
+#ifdef PNG_NO_CONSOLE_IO
+   error_message = error_message; /* make compiler happy */
+#endif
+}
+
+#ifndef PNG_NO_WARNINGS
+/* This function is called when there is a warning, but the library thinks
+ * it can continue anyway.  Replacement functions don't have to do anything
+ * here if you don't want them to.  In the default configuration, png_ptr is
+ * not used, but it is passed in case it may be useful.
+ */
+static void /* PRIVATE */
+png_default_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+#ifndef PNG_NO_CONSOLE_IO
+#  ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (*warning_message == '#')
+   {
+     int offset;
+     char warning_number[16];
+     for (offset=0; offset<15; offset++)
+     {
+        warning_number[offset]=*(warning_message+offset+1);
+        if (*(warning_message+offset) == ' ')
+            break;
+     }
+     if((offset > 1) && (offset < 15))
+     {
+       warning_number[offset-1]='\0';
+       fprintf(stderr, "libpng warning no. %s: %s\n", warning_number,
+          warning_message+offset);
+     }
+     else
+       fprintf(stderr, "libpng warning: %s\n", warning_message);
+   }
+   else
+#  endif
+     fprintf(stderr, "libpng warning: %s\n", warning_message);
+#else
+   warning_message = warning_message; /* make compiler happy */
+#endif
+   png_ptr = png_ptr; /* make compiler happy */
+}
+#endif /* PNG_NO_WARNINGS */
+
+/* This function is called when the application wants to use another method
+ * of handling errors and warnings.  Note that the error function MUST NOT
+ * return to the calling routine or serious problems will occur.  The return
+ * method used in the default routine calls longjmp(png_ptr->jmpbuf, 1)
+ */
+void PNGAPI
+png_set_error_fn(png_structp png_ptr, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warning_fn)
+{
+   if (png_ptr == NULL)
+      return;
+   png_ptr->error_ptr = error_ptr;
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+}
+
+
+/* This function returns a pointer to the error_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy and png_read_destroy are called.
+ */
+png_voidp PNGAPI
+png_get_error_ptr(png_structp png_ptr)
+{
+   if (png_ptr == NULL)
+      return NULL;
+   return ((png_voidp)png_ptr->error_ptr);
+}
+
+
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+void PNGAPI
+png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode)
+{
+   if(png_ptr != NULL)
+   {
+     png_ptr->flags &=
+       ((~(PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))&strip_mode);
+   }
+}
+#endif
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/PNG/pnggccrd.c b/PNG/pnggccrd.c
new file mode 100644
index 0000000..e61523e
--- /dev/null
+++ b/PNG/pnggccrd.c
@@ -0,0 +1,103 @@
+/* pnggccrd.c was removed from libpng-1.2.20. */
+
+/* This code snippet is for use by configure's compilation test. */
+
+#if (!defined _MSC_VER) && \
+    defined(PNG_ASSEMBLER_CODE_SUPPORTED) && \
+    defined(PNG_MMX_CODE_SUPPORTED)
+
+int PNGAPI png_dummy_mmx_support(void);
+
+static int _mmx_supported = 2; // 0: no MMX; 1: MMX supported; 2: not tested
+
+int PNGAPI
+png_dummy_mmx_support(void) __attribute__((noinline));
+
+int PNGAPI
+png_dummy_mmx_support(void)
+{
+   int result;
+#if defined(PNG_MMX_CODE_SUPPORTED)  // superfluous, but what the heck
+    __asm__ __volatile__ (
+#if defined(__x86_64__)
+        "pushq %%rbx          \n\t"  // rbx gets clobbered by CPUID instruction
+        "pushq %%rcx          \n\t"  // so does rcx...
+        "pushq %%rdx          \n\t"  // ...and rdx (but rcx & rdx safe on Linux)
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack into rax
+        "movq %%rax, %%rcx    \n\t"  // make another copy of Eflag in rcx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushq %%rax          \n\t"  // save modified Eflag back to stack
+        "popfq                \n\t"  // restore modified value to Eflag reg
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack
+        "pushq %%rcx          \n\t"  // save original Eflag to stack
+        "popfq                \n\t"  // restore original Eflag
+#else
+        "pushl %%ebx          \n\t"  // ebx gets clobbered by CPUID instruction
+        "pushl %%ecx          \n\t"  // so does ecx...
+        "pushl %%edx          \n\t"  // ...and edx (but ecx & edx safe on Linux)
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack into eax
+        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushl %%eax          \n\t"  // save modified Eflag back to stack
+        "popfl                \n\t"  // restore modified value to Eflag reg
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack
+        "pushl %%ecx          \n\t"  // save original Eflag to stack
+        "popfl                \n\t"  // restore original Eflag
+#endif
+        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
+        "jz 0f                \n\t"  // if same, CPUID instr. is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero
+//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
+        "cpuid                \n\t"  // get the CPU identification info
+        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
+        "jl 0f                \n\t"  // if eax is zero, MMX is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
+        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
+                                     // faster than the instruction "mov eax, 1"
+        "cpuid                \n\t"  // get the CPU identification info again
+        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
+        "jz 0f                \n\t"  // non-zero = yes, MMX IS supported
+
+        "movl $1, %%eax       \n\t"  // set return value to 1
+        "jmp  1f              \n\t"  // DONE:  have MMX support
+
+    "0:                       \n\t"  // .NOT_SUPPORTED: target label for jump instructions
+        "movl $0, %%eax       \n\t"  // set return value to 0
+    "1:                       \n\t"  // .RETURN: target label for jump instructions
+#if defined(__x86_64__)
+        "popq %%rdx           \n\t"  // restore rdx
+        "popq %%rcx           \n\t"  // restore rcx
+        "popq %%rbx           \n\t"  // restore rbx
+#else
+        "popl %%edx           \n\t"  // restore edx
+        "popl %%ecx           \n\t"  // restore ecx
+        "popl %%ebx           \n\t"  // restore ebx
+#endif
+
+//      "ret                  \n\t"  // DONE:  no MMX support
+                                     // (fall through to standard C "ret")
+
+        : "=a" (result)              // output list
+
+        :                            // any variables used on input (none)
+
+                                     // no clobber list
+//      , "%ebx", "%ecx", "%edx"     // GRR:  we handle these manually
+//      , "memory"   // if write to a variable gcc thought was in a reg
+//      , "cc"       // "condition codes" (flag bits)
+    );
+    _mmx_supported = result;
+#else
+    _mmx_supported = 0;
+#endif /* PNG_MMX_CODE_SUPPORTED */
+
+    return _mmx_supported;
+}
+#endif
diff --git a/PNG/pngget.c b/PNG/pngget.c
new file mode 100644
index 0000000..a0e90bb
--- /dev/null
+++ b/PNG/pngget.c
@@ -0,0 +1,901 @@
+
+/* pngget.c - retrieval of values from info struct
+ *
+ * Last changed in libpng 1.2.15 January 5, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+png_uint_32 PNGAPI
+png_get_valid(png_structp png_ptr, png_infop info_ptr, png_uint_32 flag)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->valid & flag);
+   else
+      return(0);
+}
+
+png_uint_32 PNGAPI
+png_get_rowbytes(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->rowbytes);
+   else
+      return(0);
+}
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+png_bytepp PNGAPI
+png_get_rows(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->row_pointers);
+   else
+      return(0);
+}
+#endif
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* easy access to info, added in libpng-0.99 */
+png_uint_32 PNGAPI
+png_get_image_width(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->width;
+   }
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_image_height(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->height;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_bit_depth(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->bit_depth;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_color_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->color_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_filter_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->filter_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_interlace_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->interlace_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_compression_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->compression_type;
+   }
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER)
+          return (0);
+      else return (info_ptr->x_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER)
+          return (0);
+      else return (info_ptr->y_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER ||
+         info_ptr->x_pixels_per_unit != info_ptr->y_pixels_per_unit)
+          return (0);
+      else return (info_ptr->x_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+float PNGAPI
+png_get_pixel_aspect_ratio(png_structp png_ptr, png_infop info_ptr)
+   {
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_aspect_ratio");
+      if (info_ptr->x_pixels_per_unit == 0)
+         return ((float)0.0);
+      else
+         return ((float)((float)info_ptr->y_pixels_per_unit
+            /(float)info_ptr->x_pixels_per_unit));
+   }
+#else
+   return (0.0);
+#endif
+   return ((float)0.0);
+}
+#endif
+
+png_int_32 PNGAPI
+png_get_x_offset_microns(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_MICROMETER)
+          return (0);
+      else return (info_ptr->x_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_microns(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_MICROMETER)
+          return (0);
+      else return (info_ptr->y_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_x_offset_pixels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_PIXEL)
+          return (0);
+      else return (info_ptr->x_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_pixels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_PIXEL)
+          return (0);
+      else return (info_ptr->y_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_x_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_y_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+float PNGAPI
+png_get_x_offset_inches(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((float)png_get_x_offset_microns(png_ptr, info_ptr)
+     *.00003937);
+}
+
+float PNGAPI
+png_get_y_offset_inches(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((float)png_get_y_offset_microns(png_ptr, info_ptr)
+     *.00003937);
+}
+
+#if defined(PNG_pHYs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pHYs_dpi(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_debug1(1, "in %s retrieval function\n", "pHYs");
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+         if(*unit_type == 1)
+         {
+            if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50);
+            if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50);
+         }
+      }
+   }
+   return (retval);
+}
+#endif /* PNG_pHYs_SUPPORTED */
+#endif  /* PNG_INCH_CONVERSIONS && PNG_FLOATING_POINT_SUPPORTED */
+
+/* png_get_channels really belongs in here, too, but it's been around longer */
+
+#endif  /* PNG_EASY_ACCESS_SUPPORTED */
+
+png_byte PNGAPI
+png_get_channels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->channels);
+   else
+      return (0);
+}
+
+png_bytep PNGAPI
+png_get_signature(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->signature);
+   else
+      return (NULL);
+}
+
+#if defined(PNG_bKGD_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_bKGD(png_structp png_ptr, png_infop info_ptr,
+   png_color_16p *background)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD)
+      && background != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "bKGD");
+      *background = &(info_ptr->background);
+      return (PNG_INFO_bKGD);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM(png_structp png_ptr, png_infop info_ptr,
+   double *white_x, double *white_y, double *red_x, double *red_y,
+   double *green_x, double *green_y, double *blue_x, double *blue_y)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+   {
+      png_debug1(1, "in %s retrieval function\n", "cHRM");
+      if (white_x != NULL)
+         *white_x = (double)info_ptr->x_white;
+      if (white_y != NULL)
+         *white_y = (double)info_ptr->y_white;
+      if (red_x != NULL)
+         *red_x = (double)info_ptr->x_red;
+      if (red_y != NULL)
+         *red_y = (double)info_ptr->y_red;
+      if (green_x != NULL)
+         *green_x = (double)info_ptr->x_green;
+      if (green_y != NULL)
+         *green_y = (double)info_ptr->y_green;
+      if (blue_x != NULL)
+         *blue_x = (double)info_ptr->x_blue;
+      if (blue_y != NULL)
+         *blue_y = (double)info_ptr->y_blue;
+      return (PNG_INFO_cHRM);
+   }
+   return (0);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
+   png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
+   png_fixed_point *blue_x, png_fixed_point *blue_y)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+   {
+      png_debug1(1, "in %s retrieval function\n", "cHRM");
+      if (white_x != NULL)
+         *white_x = info_ptr->int_x_white;
+      if (white_y != NULL)
+         *white_y = info_ptr->int_y_white;
+      if (red_x != NULL)
+         *red_x = info_ptr->int_x_red;
+      if (red_y != NULL)
+         *red_y = info_ptr->int_y_red;
+      if (green_x != NULL)
+         *green_x = info_ptr->int_x_green;
+      if (green_y != NULL)
+         *green_y = info_ptr->int_y_green;
+      if (blue_x != NULL)
+         *blue_x = info_ptr->int_x_blue;
+      if (blue_y != NULL)
+         *blue_y = info_ptr->int_y_blue;
+      return (PNG_INFO_cHRM);
+   }
+   return (0);
+}
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA(png_structp png_ptr, png_infop info_ptr, double *file_gamma)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+      && file_gamma != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "gAMA");
+      *file_gamma = (double)info_ptr->gamma;
+      return (PNG_INFO_gAMA);
+   }
+   return (0);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA_fixed(png_structp png_ptr, png_infop info_ptr,
+    png_fixed_point *int_file_gamma)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+      && int_file_gamma != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "gAMA");
+      *int_file_gamma = info_ptr->int_gamma;
+      return (PNG_INFO_gAMA);
+   }
+   return (0);
+}
+#endif
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sRGB(png_structp png_ptr, png_infop info_ptr, int *file_srgb_intent)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB)
+      && file_srgb_intent != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "sRGB");
+      *file_srgb_intent = (int)info_ptr->srgb_intent;
+      return (PNG_INFO_sRGB);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_iCCP(png_structp png_ptr, png_infop info_ptr,
+             png_charpp name, int *compression_type,
+             png_charpp profile, png_uint_32 *proflen)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP)
+      && name != NULL && profile != NULL && proflen != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "iCCP");
+      *name = info_ptr->iccp_name;
+      *profile = info_ptr->iccp_profile;
+      /* compression_type is a dummy so the API won't have to change
+         if we introduce multiple compression types later. */
+      *proflen = (int)info_ptr->iccp_proflen;
+      *compression_type = (int)info_ptr->iccp_compression;
+      return (PNG_INFO_iCCP);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sPLT(png_structp png_ptr, png_infop info_ptr,
+             png_sPLT_tpp spalettes)
+{
+   if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
+   {
+     *spalettes = info_ptr->splt_palettes;
+     return ((png_uint_32)info_ptr->splt_palettes_num);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_16p *hist)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST)
+      && hist != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "hIST");
+      *hist = info_ptr->hist;
+      return (PNG_INFO_hIST);
+   }
+   return (0);
+}
+#endif
+
+png_uint_32 PNGAPI
+png_get_IHDR(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *width, png_uint_32 *height, int *bit_depth,
+   int *color_type, int *interlace_type, int *compression_type,
+   int *filter_type)
+
+{
+   if (png_ptr != NULL && info_ptr != NULL && width != NULL && height != NULL &&
+      bit_depth != NULL && color_type != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "IHDR");
+      *width = info_ptr->width;
+      *height = info_ptr->height;
+      *bit_depth = info_ptr->bit_depth;
+      if (info_ptr->bit_depth < 1 || info_ptr->bit_depth > 16)
+        png_error(png_ptr, "Invalid bit depth");
+      *color_type = info_ptr->color_type;
+      if (info_ptr->color_type > 6)
+        png_error(png_ptr, "Invalid color type");
+      if (compression_type != NULL)
+         *compression_type = info_ptr->compression_type;
+      if (filter_type != NULL)
+         *filter_type = info_ptr->filter_type;
+      if (interlace_type != NULL)
+         *interlace_type = info_ptr->interlace_type;
+
+      /* check for potential overflow of rowbytes */
+      if (*width == 0 || *width > PNG_UINT_31_MAX)
+        png_error(png_ptr, "Invalid image width");
+      if (*height == 0 || *height > PNG_UINT_31_MAX)
+        png_error(png_ptr, "Invalid image height");
+      if (info_ptr->width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      {
+         png_warning(png_ptr,
+            "Width too large for libpng to process image data.");
+      }
+      return (1);
+   }
+   return (0);
+}
+
+#if defined(PNG_oFFs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_oFFs(png_structp png_ptr, png_infop info_ptr,
+   png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)
+      && offset_x != NULL && offset_y != NULL && unit_type != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "oFFs");
+      *offset_x = info_ptr->x_offset;
+      *offset_y = info_ptr->y_offset;
+      *unit_type = (int)info_ptr->offset_unit_type;
+      return (PNG_INFO_oFFs);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pCAL(png_structp png_ptr, png_infop info_ptr,
+   png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams,
+   png_charp *units, png_charpp *params)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL)
+      && purpose != NULL && X0 != NULL && X1 != NULL && type != NULL &&
+      nparams != NULL && units != NULL && params != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "pCAL");
+      *purpose = info_ptr->pcal_purpose;
+      *X0 = info_ptr->pcal_X0;
+      *X1 = info_ptr->pcal_X1;
+      *type = (int)info_ptr->pcal_type;
+      *nparams = (int)info_ptr->pcal_nparams;
+      *units = info_ptr->pcal_units;
+      *params = info_ptr->pcal_params;
+      return (PNG_INFO_pCAL);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sCAL(png_structp png_ptr, png_infop info_ptr,
+             int *unit, double *width, double *height)
+{
+    if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL))
+    {
+        *unit = info_ptr->scal_unit;
+        *width = info_ptr->scal_pixel_width;
+        *height = info_ptr->scal_pixel_height;
+        return (PNG_INFO_sCAL);
+    }
+    return(0);
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int *unit, png_charpp width, png_charpp height)
+{
+    if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL))
+    {
+        *unit = info_ptr->scal_unit;
+        *width = info_ptr->scal_s_width;
+        *height = info_ptr->scal_s_height;
+        return (PNG_INFO_sCAL);
+    }
+    return(0);
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pHYs(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_debug1(1, "in %s retrieval function\n", "pHYs");
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+      }
+   }
+   return (retval);
+}
+#endif
+
+png_uint_32 PNGAPI
+png_get_PLTE(png_structp png_ptr, png_infop info_ptr, png_colorp *palette,
+   int *num_palette)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_PLTE)
+       && palette != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "PLTE");
+      *palette = info_ptr->palette;
+      *num_palette = info_ptr->num_palette;
+      png_debug1(3, "num_palette = %d\n", *num_palette);
+      return (PNG_INFO_PLTE);
+   }
+   return (0);
+}
+
+#if defined(PNG_sBIT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sBIT(png_structp png_ptr, png_infop info_ptr, png_color_8p *sig_bit)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT)
+      && sig_bit != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "sBIT");
+      *sig_bit = &(info_ptr->sig_bit);
+      return (PNG_INFO_sBIT);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_text(png_structp png_ptr, png_infop info_ptr, png_textp *text_ptr,
+   int *num_text)
+{
+   if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0)
+   {
+      png_debug1(1, "in %s retrieval function\n",
+         (png_ptr->chunk_name[0] == '\0' ? "text"
+             : (png_const_charp)png_ptr->chunk_name));
+      if (text_ptr != NULL)
+         *text_ptr = info_ptr->text;
+      if (num_text != NULL)
+         *num_text = info_ptr->num_text;
+      return ((png_uint_32)info_ptr->num_text);
+   }
+   if (num_text != NULL)
+     *num_text = 0;
+   return(0);
+}
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_tIME(png_structp png_ptr, png_infop info_ptr, png_timep *mod_time)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME)
+       && mod_time != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "tIME");
+      *mod_time = &(info_ptr->mod_time);
+      return (PNG_INFO_tIME);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_tRNS(png_structp png_ptr, png_infop info_ptr,
+   png_bytep *trans, int *num_trans, png_color_16p *trans_values)
+{
+   png_uint_32 retval = 0;
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+   {
+      png_debug1(1, "in %s retrieval function\n", "tRNS");
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+          if (trans != NULL)
+          {
+             *trans = info_ptr->trans;
+             retval |= PNG_INFO_tRNS;
+          }
+          if (trans_values != NULL)
+             *trans_values = &(info_ptr->trans_values);
+      }
+      else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */
+      {
+          if (trans_values != NULL)
+          {
+             *trans_values = &(info_ptr->trans_values);
+             retval |= PNG_INFO_tRNS;
+          }
+          if(trans != NULL)
+             *trans = NULL;
+      }
+      if(num_trans != NULL)
+      {
+         *num_trans = info_ptr->num_trans;
+         retval |= PNG_INFO_tRNS;
+      }
+   }
+   return (retval);
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_unknown_chunks(png_structp png_ptr, png_infop info_ptr,
+             png_unknown_chunkpp unknowns)
+{
+   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
+   {
+     *unknowns = info_ptr->unknown_chunks;
+     return ((png_uint_32)info_ptr->unknown_chunks_num);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+png_byte PNGAPI
+png_get_rgb_to_gray_status (png_structp png_ptr)
+{
+   return (png_byte)(png_ptr? png_ptr->rgb_to_gray_status : 0);
+}
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+png_voidp PNGAPI
+png_get_user_chunk_ptr(png_structp png_ptr)
+{
+   return (png_ptr? png_ptr->user_chunk_ptr : NULL);
+}
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+png_uint_32 PNGAPI
+png_get_compression_buffer_size(png_structp png_ptr)
+{
+   return (png_uint_32)(png_ptr? png_ptr->zbuf_size : 0L);
+}
+#endif
+
+#ifdef PNG_ASSEMBLER_CODE_SUPPORTED
+#ifndef PNG_1_0_X
+/* this function was added to libpng 1.2.0 and should exist by default */
+png_uint_32 PNGAPI
+png_get_asm_flags (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0L: 0L);
+}
+
+/* this function was added to libpng 1.2.0 and should exist by default */
+png_uint_32 PNGAPI
+png_get_asm_flagmask (int flag_select)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    flag_select=flag_select;
+    return 0L;
+}
+
+    /* GRR:  could add this:   && defined(PNG_MMX_CODE_SUPPORTED) */
+/* this function was added to libpng 1.2.0 */
+png_uint_32 PNGAPI
+png_get_mmx_flagmask (int flag_select, int *compilerID)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    flag_select=flag_select;
+    *compilerID = -1;   /* unknown (i.e., no asm/MMX code compiled) */
+    return 0L;
+}
+
+/* this function was added to libpng 1.2.0 */
+png_byte PNGAPI
+png_get_mmx_bitdepth_threshold (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0: 0);
+}
+
+/* this function was added to libpng 1.2.0 */
+png_uint_32 PNGAPI
+png_get_mmx_rowbytes_threshold (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0L: 0L);
+}
+#endif /* ?PNG_1_0_X */
+#endif /* ?PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* these functions were added to libpng 1.2.6 */
+png_uint_32 PNGAPI
+png_get_user_width_max (png_structp png_ptr)
+{
+    return (png_ptr? png_ptr->user_width_max : 0);
+}
+png_uint_32 PNGAPI
+png_get_user_height_max (png_structp png_ptr)
+{
+    return (png_ptr? png_ptr->user_height_max : 0);
+}
+#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
+ 
+
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngmem.c b/PNG/pngmem.c
new file mode 100644
index 0000000..13cc60c
--- /dev/null
+++ b/PNG/pngmem.c
@@ -0,0 +1,608 @@
+
+/* pngmem.c - stub functions for memory allocation
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all memory allocation.  Users who
+ * need special memory handling are expected to supply replacement
+ * functions for png_malloc() and png_free(), and to use
+ * png_create_read_struct_2() and png_create_write_struct_2() to
+ * identify the replacement functions.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+/* Borland DOS special memory handler */
+#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
+/* if you change this, be sure to change the one in png.h also */
+
+/* Allocate memory for a png_struct.  The malloc and memset can be replaced
+   by a single call to calloc() if this is thought to improve performance. */
+png_voidp /* PRIVATE */
+png_create_struct(int type)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_struct_2(type, png_malloc_ptr_NULL, png_voidp_NULL));
+}
+
+/* Alternate version of png_create_struct, for use with user-defined malloc. */
+png_voidp /* PRIVATE */
+png_create_struct_2(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_size_t size;
+   png_voidp struct_ptr;
+
+   if (type == PNG_STRUCT_INFO)
+     size = png_sizeof(png_info);
+   else if (type == PNG_STRUCT_PNG)
+     size = png_sizeof(png_struct);
+   else
+     return (png_get_copyright(NULL));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(malloc_fn != NULL)
+   {
+      png_struct dummy_struct;
+      png_structp png_ptr = &dummy_struct;
+      png_ptr->mem_ptr=mem_ptr;
+      struct_ptr = (*(malloc_fn))(png_ptr, (png_uint_32)size);
+   }
+   else
+#endif /* PNG_USER_MEM_SUPPORTED */
+      struct_ptr = (png_voidp)farmalloc(size);
+   if (struct_ptr != NULL)
+      png_memset(struct_ptr, 0, size);
+   return (struct_ptr);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct(png_voidp struct_ptr)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_destroy_struct_2(struct_ptr, png_free_ptr_NULL, png_voidp_NULL);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
+    png_voidp mem_ptr)
+{
+#endif
+   if (struct_ptr != NULL)
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      if(free_fn != NULL)
+      {
+         png_struct dummy_struct;
+         png_structp png_ptr = &dummy_struct;
+         png_ptr->mem_ptr=mem_ptr;
+         (*(free_fn))(png_ptr, struct_ptr);
+         return;
+      }
+#endif /* PNG_USER_MEM_SUPPORTED */
+      farfree (struct_ptr);
+   }
+}
+
+/* Allocate memory.  For reasonable files, size should never exceed
+ * 64K.  However, zlib may allocate more then 64K if you don't tell
+ * it not to.  See zconf.h and png.h for more information. zlib does
+ * need to allocate exactly 64K, so whatever you call here must
+ * have the ability to do that.
+ *
+ * Borland seems to have a problem in DOS mode for exactly 64K.
+ * It gives you a segment with an offset of 8 (perhaps to store its
+ * memory stuff).  zlib doesn't like this at all, so we have to
+ * detect and deal with it.  This code should not be needed in
+ * Windows or OS/2 modes, and only in 16 bit mode.  This code has
+ * been updated by Alexander Lehmann for version 0.89 to waste less
+ * memory.
+ *
+ * Note that we can't use png_size_t for the "size" declaration,
+ * since on some systems a png_size_t is a 16-bit quantity, and as a
+ * result, we would be truncating potentially larger memory requests
+ * (which should cause a fatal error) and introducing major problems.
+ */
+
+png_voidp PNGAPI
+png_malloc(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(png_ptr->malloc_fn != NULL)
+       ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size));
+   else
+       ret = (png_malloc_default(png_ptr, size));
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+       png_error(png_ptr, "Out of memory!");
+   return (ret);
+}
+
+png_voidp PNGAPI
+png_malloc_default(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (size > (png_uint_32)65536L)
+   {
+      png_warning(png_ptr, "Cannot Allocate > 64K");
+      ret = NULL;
+   }
+   else
+#endif
+
+   if (size != (size_t)size)
+     ret = NULL;
+   else if (size == (png_uint_32)65536L)
+   {
+      if (png_ptr->offset_table == NULL)
+      {
+         /* try to see if we need to do any of this fancy stuff */
+         ret = farmalloc(size);
+         if (ret == NULL || ((png_size_t)ret & 0xffff))
+         {
+            int num_blocks;
+            png_uint_32 total_size;
+            png_bytep table;
+            int i;
+            png_byte huge * hptr;
+
+            if (ret != NULL)
+            {
+               farfree(ret);
+               ret = NULL;
+            }
+
+            if(png_ptr->zlib_window_bits > 14)
+               num_blocks = (int)(1 << (png_ptr->zlib_window_bits - 14));
+            else
+               num_blocks = 1;
+            if (png_ptr->zlib_mem_level >= 7)
+               num_blocks += (int)(1 << (png_ptr->zlib_mem_level - 7));
+            else
+               num_blocks++;
+
+            total_size = ((png_uint_32)65536L) * (png_uint_32)num_blocks+16;
+
+            table = farmalloc(total_size);
+
+            if (table == NULL)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr, "Out Of Memory."); /* Note "O" and "M" */
+               else
+                  png_warning(png_ptr, "Out Of Memory.");
+#endif
+               return (NULL);
+            }
+
+            if ((png_size_t)table & 0xfff0)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr,
+                    "Farmalloc didn't return normalized pointer");
+               else
+                  png_warning(png_ptr,
+                    "Farmalloc didn't return normalized pointer");
+#endif
+               return (NULL);
+            }
+
+            png_ptr->offset_table = table;
+            png_ptr->offset_table_ptr = farmalloc(num_blocks *
+               png_sizeof (png_bytep));
+
+            if (png_ptr->offset_table_ptr == NULL)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr, "Out Of memory."); /* Note "O" and "M" */
+               else
+                  png_warning(png_ptr, "Out Of memory.");
+#endif
+               return (NULL);
+            }
+
+            hptr = (png_byte huge *)table;
+            if ((png_size_t)hptr & 0xf)
+            {
+               hptr = (png_byte huge *)((long)(hptr) & 0xfffffff0L);
+               hptr = hptr + 16L;  /* "hptr += 16L" fails on Turbo C++ 3.0 */
+            }
+            for (i = 0; i < num_blocks; i++)
+            {
+               png_ptr->offset_table_ptr[i] = (png_bytep)hptr;
+               hptr = hptr + (png_uint_32)65536L;  /* "+=" fails on TC++3.0 */
+            }
+
+            png_ptr->offset_table_number = num_blocks;
+            png_ptr->offset_table_count = 0;
+            png_ptr->offset_table_count_free = 0;
+         }
+      }
+
+      if (png_ptr->offset_table_count >= png_ptr->offset_table_number)
+      {
+#ifndef PNG_USER_MEM_SUPPORTED
+         if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+            png_error(png_ptr, "Out of Memory."); /* Note "o" and "M" */
+         else
+            png_warning(png_ptr, "Out of Memory.");
+#endif
+         return (NULL);
+      }
+
+      ret = png_ptr->offset_table_ptr[png_ptr->offset_table_count++];
+   }
+   else
+      ret = farmalloc(size);
+
+#ifndef PNG_USER_MEM_SUPPORTED
+   if (ret == NULL)
+   {
+      if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+         png_error(png_ptr, "Out of memory."); /* Note "o" and "m" */
+      else
+         png_warning(png_ptr, "Out of memory."); /* Note "o" and "m" */
+   }
+#endif
+
+   return (ret);
+}
+
+/* free a pointer allocated by png_malloc().  In the default
+   configuration, png_ptr is not used, but is passed in case it
+   is needed.  If ptr is NULL, return without taking any action. */
+void PNGAPI
+png_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr->free_fn != NULL)
+   {
+      (*(png_ptr->free_fn))(png_ptr, ptr);
+      return;
+   }
+   else png_free_default(png_ptr, ptr);
+}
+
+void PNGAPI
+png_free_default(png_structp png_ptr, png_voidp ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if(png_ptr == NULL || ptr == NULL) return;
+
+   if (png_ptr->offset_table != NULL)
+   {
+      int i;
+
+      for (i = 0; i < png_ptr->offset_table_count; i++)
+      {
+         if (ptr == png_ptr->offset_table_ptr[i])
+         {
+            ptr = NULL;
+            png_ptr->offset_table_count_free++;
+            break;
+         }
+      }
+      if (png_ptr->offset_table_count_free == png_ptr->offset_table_count)
+      {
+         farfree(png_ptr->offset_table);
+         farfree(png_ptr->offset_table_ptr);
+         png_ptr->offset_table = NULL;
+         png_ptr->offset_table_ptr = NULL;
+      }
+   }
+
+   if (ptr != NULL)
+   {
+      farfree(ptr);
+   }
+}
+
+#else /* Not the Borland DOS special memory handler */
+
+/* Allocate memory for a png_struct or a png_info.  The malloc and
+   memset can be replaced by a single call to calloc() if this is thought
+   to improve performance noticably. */
+png_voidp /* PRIVATE */
+png_create_struct(int type)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_struct_2(type, png_malloc_ptr_NULL, png_voidp_NULL));
+}
+
+/* Allocate memory for a png_struct or a png_info.  The malloc and
+   memset can be replaced by a single call to calloc() if this is thought
+   to improve performance noticably. */
+png_voidp /* PRIVATE */
+png_create_struct_2(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_size_t size;
+   png_voidp struct_ptr;
+
+   if (type == PNG_STRUCT_INFO)
+      size = png_sizeof(png_info);
+   else if (type == PNG_STRUCT_PNG)
+      size = png_sizeof(png_struct);
+   else
+      return (NULL);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(malloc_fn != NULL)
+   {
+      png_struct dummy_struct;
+      png_structp png_ptr = &dummy_struct;
+      png_ptr->mem_ptr=mem_ptr;
+      struct_ptr = (*(malloc_fn))(png_ptr, size);
+      if (struct_ptr != NULL)
+         png_memset(struct_ptr, 0, size);
+      return (struct_ptr);
+   }
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(__FLAT__)
+   struct_ptr = (png_voidp)farmalloc(size);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+   struct_ptr = (png_voidp)halloc(size,1);
+# else
+   struct_ptr = (png_voidp)malloc(size);
+# endif
+#endif
+   if (struct_ptr != NULL)
+      png_memset(struct_ptr, 0, size);
+
+   return (struct_ptr);
+}
+
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct(png_voidp struct_ptr)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_destroy_struct_2(struct_ptr, png_free_ptr_NULL, png_voidp_NULL);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
+    png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   if (struct_ptr != NULL)
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      if(free_fn != NULL)
+      {
+         png_struct dummy_struct;
+         png_structp png_ptr = &dummy_struct;
+         png_ptr->mem_ptr=mem_ptr;
+         (*(free_fn))(png_ptr, struct_ptr);
+         return;
+      }
+#endif /* PNG_USER_MEM_SUPPORTED */
+#if defined(__TURBOC__) && !defined(__FLAT__)
+      farfree(struct_ptr);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+      hfree(struct_ptr);
+# else
+      free(struct_ptr);
+# endif
+#endif
+   }
+}
+
+/* Allocate memory.  For reasonable files, size should never exceed
+   64K.  However, zlib may allocate more then 64K if you don't tell
+   it not to.  See zconf.h and png.h for more information.  zlib does
+   need to allocate exactly 64K, so whatever you call here must
+   have the ability to do that. */
+
+png_voidp PNGAPI
+png_malloc(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+   if(png_ptr->malloc_fn != NULL)
+       ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size));
+   else
+       ret = (png_malloc_default(png_ptr, size));
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+       png_error(png_ptr, "Out of Memory!");
+   return (ret);
+}
+
+png_voidp PNGAPI
+png_malloc_default(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (size > (png_uint_32)65536L)
+   {
+#ifndef PNG_USER_MEM_SUPPORTED
+      if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+         png_error(png_ptr, "Cannot Allocate > 64K");
+      else
+#endif
+         return NULL;
+   }
+#endif
+
+ /* Check for overflow */
+#if defined(__TURBOC__) && !defined(__FLAT__)
+ if (size != (unsigned long)size)
+   ret = NULL;
+ else
+   ret = farmalloc(size);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+ if (size != (unsigned long)size)
+   ret = NULL;
+ else
+   ret = halloc(size, 1);
+# else
+ if (size != (size_t)size)
+   ret = NULL;
+ else
+   ret = malloc((size_t)size);
+# endif
+#endif
+
+#ifndef PNG_USER_MEM_SUPPORTED
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+      png_error(png_ptr, "Out of Memory");
+#endif
+
+   return (ret);
+}
+
+/* Free a pointer allocated by png_malloc().  If ptr is NULL, return
+   without taking any action. */
+void PNGAPI
+png_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr->free_fn != NULL)
+   {
+      (*(png_ptr->free_fn))(png_ptr, ptr);
+      return;
+   }
+   else png_free_default(png_ptr, ptr);
+}
+void PNGAPI
+png_free_default(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(__FLAT__)
+   farfree(ptr);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+   hfree(ptr);
+# else
+   free(ptr);
+# endif
+#endif
+}
+
+#endif /* Not Borland DOS special memory handler */
+
+#if defined(PNG_1_0_X)
+#  define png_malloc_warn png_malloc
+#else
+/* This function was added at libpng version 1.2.3.  The png_malloc_warn()
+ * function will set up png_malloc() to issue a png_warning and return NULL
+ * instead of issuing a png_error, if it fails to allocate the requested
+ * memory.
+ */
+png_voidp PNGAPI
+png_malloc_warn(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ptr;
+   png_uint_32 save_flags;
+   if(png_ptr == NULL) return (NULL);
+
+   save_flags=png_ptr->flags;
+   png_ptr->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
+   ptr = (png_voidp)png_malloc((png_structp)png_ptr, size);
+   png_ptr->flags=save_flags;
+   return(ptr);
+}
+#endif
+
+png_voidp PNGAPI
+png_memcpy_check (png_structp png_ptr, png_voidp s1, png_voidp s2,
+   png_uint_32 length)
+{
+   png_size_t size;
+
+   size = (png_size_t)length;
+   if ((png_uint_32)size != length)
+      png_error(png_ptr,"Overflow in png_memcpy_check.");
+
+   return(png_memcpy (s1, s2, size));
+}
+
+png_voidp PNGAPI
+png_memset_check (png_structp png_ptr, png_voidp s1, int value,
+   png_uint_32 length)
+{
+   png_size_t size;
+
+   size = (png_size_t)length;
+   if ((png_uint_32)size != length)
+      png_error(png_ptr,"Overflow in png_memset_check.");
+
+   return (png_memset (s1, value, size));
+
+}
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* This function is called when the application wants to use another method
+ * of allocating and freeing memory.
+ */
+void PNGAPI
+png_set_mem_fn(png_structp png_ptr, png_voidp mem_ptr, png_malloc_ptr
+  malloc_fn, png_free_ptr free_fn)
+{
+   if(png_ptr != NULL) {
+   png_ptr->mem_ptr = mem_ptr;
+   png_ptr->malloc_fn = malloc_fn;
+   png_ptr->free_fn = free_fn;
+   }
+}
+
+/* This function returns a pointer to the mem_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy and png_read_destroy are called.
+ */
+png_voidp PNGAPI
+png_get_mem_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return ((png_voidp)png_ptr->mem_ptr);
+}
+#endif /* PNG_USER_MEM_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngpread.c b/PNG/pngpread.c
new file mode 100644
index 0000000..aa7151c
--- /dev/null
+++ b/PNG/pngpread.c
@@ -0,0 +1,1598 @@
+
+/* pngpread.c - read a png file in push mode
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+
+/* push model modes */
+#define PNG_READ_SIG_MODE   0
+#define PNG_READ_CHUNK_MODE 1
+#define PNG_READ_IDAT_MODE  2
+#define PNG_SKIP_MODE       3
+#define PNG_READ_tEXt_MODE  4
+#define PNG_READ_zTXt_MODE  5
+#define PNG_READ_DONE_MODE  6
+#define PNG_READ_iTXt_MODE  7
+#define PNG_ERROR_MODE      8
+
+void PNGAPI
+png_process_data(png_structp png_ptr, png_infop info_ptr,
+   png_bytep buffer, png_size_t buffer_size)
+{
+   if(png_ptr == NULL || info_ptr == NULL) return;
+   png_push_restore_buffer(png_ptr, buffer, buffer_size);
+
+   while (png_ptr->buffer_size)
+   {
+      png_process_some_data(png_ptr, info_ptr);
+   }
+}
+
+/* What we do with the incoming data depends on what we were previously
+ * doing before we ran out of data...
+ */
+void /* PRIVATE */
+png_process_some_data(png_structp png_ptr, png_infop info_ptr)
+{
+   if(png_ptr == NULL) return;
+   switch (png_ptr->process_mode)
+   {
+      case PNG_READ_SIG_MODE:
+      {
+         png_push_read_sig(png_ptr, info_ptr);
+         break;
+      }
+      case PNG_READ_CHUNK_MODE:
+      {
+         png_push_read_chunk(png_ptr, info_ptr);
+         break;
+      }
+      case PNG_READ_IDAT_MODE:
+      {
+         png_push_read_IDAT(png_ptr);
+         break;
+      }
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      case PNG_READ_tEXt_MODE:
+      {
+         png_push_read_tEXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      case PNG_READ_zTXt_MODE:
+      {
+         png_push_read_zTXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      case PNG_READ_iTXt_MODE:
+      {
+         png_push_read_iTXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+      case PNG_SKIP_MODE:
+      {
+         png_push_crc_finish(png_ptr);
+         break;
+      }
+      default:
+      {
+         png_ptr->buffer_size = 0;
+         break;
+      }
+   }
+}
+
+/* Read any remaining signature bytes from the stream and compare them with
+ * the correct PNG signature.  It is possible that this routine is called
+ * with bytes already read from the signature, either because they have been
+ * checked by the calling application, or because of multiple calls to this
+ * routine.
+ */
+void /* PRIVATE */
+png_push_read_sig(png_structp png_ptr, png_infop info_ptr)
+{
+   png_size_t num_checked = png_ptr->sig_bytes,
+             num_to_check = 8 - num_checked;
+
+   if (png_ptr->buffer_size < num_to_check)
+   {
+      num_to_check = png_ptr->buffer_size;
+   }
+
+   png_push_fill_buffer(png_ptr, &(info_ptr->signature[num_checked]),
+      num_to_check);
+   png_ptr->sig_bytes = (png_byte)(png_ptr->sig_bytes+num_to_check);
+
+   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
+   {
+      if (num_checked < 4 &&
+          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
+         png_error(png_ptr, "Not a PNG file");
+      else
+         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
+   }
+   else
+   {
+      if (png_ptr->sig_bytes >= 8)
+      {
+         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+      }
+   }
+}
+
+void /* PRIVATE */
+png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+   /* First we make sure we have enough data for the 4 byte chunk name
+    * and the 4 byte chunk length before proceeding with decoding the
+    * chunk data.  To fully decode each of these chunks, we also make
+    * sure we have enough data in the buffer for the 4 byte CRC at the
+    * end of every chunk (except IDAT, which is handled separately).
+    */
+   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   {
+      png_byte chunk_length[4];
+
+      if (png_ptr->buffer_size < 8)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_fill_buffer(png_ptr, chunk_length, 4);
+      png_ptr->push_length = png_get_uint_31(png_ptr,chunk_length);
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
+   }
+
+   if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+     if(png_ptr->mode & PNG_AFTER_IDAT)
+        png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+   if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         if (png_ptr->push_length != 13)
+            png_error(png_ptr, "Invalid IHDR length");
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_IHDR(png_ptr, info_ptr, png_ptr->push_length);
+   }
+   else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_IEND(png_ptr, info_ptr, png_ptr->push_length);
+
+      png_ptr->process_mode = PNG_READ_DONE_MODE;
+      png_push_have_end(png_ptr, info_ptr);
+   }
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         png_ptr->mode |= PNG_HAVE_IDAT;
+      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+      if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_ptr->mode |= PNG_HAVE_PLTE;
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         if (!(png_ptr->mode & PNG_HAVE_IHDR))
+            png_error(png_ptr, "Missing IHDR before IDAT");
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                  !(png_ptr->mode & PNG_HAVE_PLTE))
+            png_error(png_ptr, "Missing PLTE before IDAT");
+      }
+   }
+#endif
+   else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_PLTE(png_ptr, info_ptr, png_ptr->push_length);
+   }
+   else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+   {
+      /* If we reach an IDAT chunk, this means we have read all of the
+       * header chunks, and we can start reading the image (or if this
+       * is called after the image has been read - we have an error).
+       */
+     if (!(png_ptr->mode & PNG_HAVE_IHDR))
+       png_error(png_ptr, "Missing IHDR before IDAT");
+     else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+         !(png_ptr->mode & PNG_HAVE_PLTE))
+       png_error(png_ptr, "Missing PLTE before IDAT");
+
+      if (png_ptr->mode & PNG_HAVE_IDAT)
+      {
+         if (!(png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+           if (png_ptr->push_length == 0)
+              return;
+
+         if (png_ptr->mode & PNG_AFTER_IDAT)
+            png_error(png_ptr, "Too many IDAT's found");
+      }
+
+      png_ptr->idat_size = png_ptr->push_length;
+      png_ptr->mode |= PNG_HAVE_IDAT;
+      png_ptr->process_mode = PNG_READ_IDAT_MODE;
+      png_push_have_info(png_ptr, info_ptr);
+      png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+      png_ptr->zstream.next_out = png_ptr->row_buf;
+      return;
+   }
+#if defined(PNG_READ_gAMA_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_gAMA(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sBIT(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_cHRM(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_tRNS(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_bKGD_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_bKGD(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_hIST(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_pHYs(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_oFFs(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_tIME(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_tEXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_zTXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+   else
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+   }
+
+   png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
+}
+
+void /* PRIVATE */
+png_push_crc_skip(png_structp png_ptr, png_uint_32 skip)
+{
+   png_ptr->process_mode = PNG_SKIP_MODE;
+   png_ptr->skip_length = skip;
+}
+
+void /* PRIVATE */
+png_push_crc_finish(png_structp png_ptr)
+{
+   if (png_ptr->skip_length && png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->skip_length < (png_uint_32)png_ptr->save_buffer_size)
+         save_size = (png_size_t)png_ptr->skip_length;
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
+
+      png_ptr->skip_length -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (png_ptr->skip_length && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->skip_length < (png_uint_32)png_ptr->current_buffer_size)
+         save_size = (png_size_t)png_ptr->skip_length;
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
+
+      png_ptr->skip_length -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+   if (!png_ptr->skip_length)
+   {
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_crc_finish(png_ptr, 0);
+      png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+   }
+}
+
+void PNGAPI
+png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
+{
+   png_bytep ptr;
+
+   if(png_ptr == NULL) return;
+   ptr = buffer;
+   if (png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (length < png_ptr->save_buffer_size)
+         save_size = length;
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_memcpy(ptr, png_ptr->save_buffer_ptr, save_size);
+      length -= save_size;
+      ptr += save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (length && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (length < png_ptr->current_buffer_size)
+         save_size = length;
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_memcpy(ptr, png_ptr->current_buffer_ptr, save_size);
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+}
+
+void /* PRIVATE */
+png_push_save_buffer(png_structp png_ptr)
+{
+   if (png_ptr->save_buffer_size)
+   {
+      if (png_ptr->save_buffer_ptr != png_ptr->save_buffer)
+      {
+         png_size_t i,istop;
+         png_bytep sp;
+         png_bytep dp;
+
+         istop = png_ptr->save_buffer_size;
+         for (i = 0, sp = png_ptr->save_buffer_ptr, dp = png_ptr->save_buffer;
+            i < istop; i++, sp++, dp++)
+         {
+            *dp = *sp;
+         }
+      }
+   }
+   if (png_ptr->save_buffer_size + png_ptr->current_buffer_size >
+      png_ptr->save_buffer_max)
+   {
+      png_size_t new_max;
+      png_bytep old_buffer;
+
+      if (png_ptr->save_buffer_size > PNG_SIZE_MAX -
+         (png_ptr->current_buffer_size + 256))
+      {
+        png_error(png_ptr, "Potential overflow of save_buffer");
+      }
+      new_max = png_ptr->save_buffer_size + png_ptr->current_buffer_size + 256;
+      old_buffer = png_ptr->save_buffer;
+      png_ptr->save_buffer = (png_bytep)png_malloc(png_ptr,
+         (png_uint_32)new_max);
+      png_memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size);
+      png_free(png_ptr, old_buffer);
+      png_ptr->save_buffer_max = new_max;
+   }
+   if (png_ptr->current_buffer_size)
+   {
+      png_memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size,
+         png_ptr->current_buffer_ptr, png_ptr->current_buffer_size);
+      png_ptr->save_buffer_size += png_ptr->current_buffer_size;
+      png_ptr->current_buffer_size = 0;
+   }
+   png_ptr->save_buffer_ptr = png_ptr->save_buffer;
+   png_ptr->buffer_size = 0;
+}
+
+void /* PRIVATE */
+png_push_restore_buffer(png_structp png_ptr, png_bytep buffer,
+   png_size_t buffer_length)
+{
+   png_ptr->current_buffer = buffer;
+   png_ptr->current_buffer_size = buffer_length;
+   png_ptr->buffer_size = buffer_length + png_ptr->save_buffer_size;
+   png_ptr->current_buffer_ptr = png_ptr->current_buffer;
+}
+
+void /* PRIVATE */
+png_push_read_IDAT(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST PNG_IDAT;
+#endif
+   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   {
+      png_byte chunk_length[4];
+
+      if (png_ptr->buffer_size < 8)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_fill_buffer(png_ptr, chunk_length, 4);
+      png_ptr->push_length = png_get_uint_31(png_ptr,chunk_length);
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
+
+      if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+         if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+            png_error(png_ptr, "Not enough compressed data");
+         return;
+      }
+
+      png_ptr->idat_size = png_ptr->push_length;
+   }
+   if (png_ptr->idat_size && png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->idat_size < (png_uint_32)png_ptr->save_buffer_size)
+      {
+         save_size = (png_size_t)png_ptr->idat_size;
+         /* check for overflow */
+         if((png_uint_32)save_size != png_ptr->idat_size)
+            png_error(png_ptr, "save_size overflowed in pngpread");
+      }
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
+      if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+         png_process_IDAT_data(png_ptr, png_ptr->save_buffer_ptr, save_size);
+      png_ptr->idat_size -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (png_ptr->idat_size && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->idat_size < (png_uint_32)png_ptr->current_buffer_size)
+      {
+         save_size = (png_size_t)png_ptr->idat_size;
+         /* check for overflow */
+         if((png_uint_32)save_size != png_ptr->idat_size)
+            png_error(png_ptr, "save_size overflowed in pngpread");
+      }
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
+      if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+        png_process_IDAT_data(png_ptr, png_ptr->current_buffer_ptr, save_size);
+
+      png_ptr->idat_size -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+   if (!png_ptr->idat_size)
+   {
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_crc_finish(png_ptr, 0);
+      png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
+      png_ptr->mode |= PNG_AFTER_IDAT;
+   }
+}
+
+void /* PRIVATE */
+png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
+   png_size_t buffer_length)
+{
+   int ret;
+
+   if ((png_ptr->flags & PNG_FLAG_ZLIB_FINISHED) && buffer_length)
+      png_error(png_ptr, "Extra compression data");
+
+   png_ptr->zstream.next_in = buffer;
+   png_ptr->zstream.avail_in = (uInt)buffer_length;
+   for(;;)
+   {
+      ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+      if (ret != Z_OK)
+      {
+         if (ret == Z_STREAM_END)
+         {
+            if (png_ptr->zstream.avail_in)
+               png_error(png_ptr, "Extra compressed data");
+            if (!(png_ptr->zstream.avail_out))
+            {
+               png_push_process_row(png_ptr);
+            }
+
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+         else if (ret == Z_BUF_ERROR)
+            break;
+         else
+            png_error(png_ptr, "Decompression Error");
+      }
+      if (!(png_ptr->zstream.avail_out))
+      {
+         if ((
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+             png_ptr->interlaced && png_ptr->pass > 6) ||
+             (!png_ptr->interlaced &&
+#endif
+             png_ptr->row_number == png_ptr->num_rows))
+         {
+           if (png_ptr->zstream.avail_in)
+             png_warning(png_ptr, "Too much data in IDAT chunks");
+           png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+           break;
+         }
+         png_push_process_row(png_ptr);
+         png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+         png_ptr->zstream.next_out = png_ptr->row_buf;
+      }
+      else
+         break;
+   }
+}
+
+void /* PRIVATE */
+png_push_process_row(png_structp png_ptr)
+{
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->iwidth;
+   png_ptr->row_info.channels = png_ptr->channels;
+   png_ptr->row_info.bit_depth = png_ptr->bit_depth;
+   png_ptr->row_info.pixel_depth = png_ptr->pixel_depth;
+
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+       png_ptr->row_info.width);
+
+   png_read_filter_row(png_ptr, &(png_ptr->row_info),
+      png_ptr->row_buf + 1, png_ptr->prev_row + 1,
+      (int)(png_ptr->row_buf[0]));
+
+   png_memcpy_check(png_ptr, png_ptr->prev_row, png_ptr->row_buf,
+      png_ptr->rowbytes + 1);
+
+   if (png_ptr->transformations || (png_ptr->flags&PNG_FLAG_STRIP_ALPHA))
+      png_do_read_transformations(png_ptr);
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* blow up interlaced rows to full size */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      if (png_ptr->pass < 6)
+/*       old interface (pre-1.0.9):
+         png_do_read_interlace(&(png_ptr->row_info),
+            png_ptr->row_buf + 1, png_ptr->pass, png_ptr->transformations);
+ */
+         png_do_read_interlace(png_ptr);
+
+    switch (png_ptr->pass)
+    {
+         case 0:
+         {
+            int i;
+            for (i = 0; i < 8 && png_ptr->pass == 0; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr); /* updates png_ptr->pass */
+            }
+            if (png_ptr->pass == 2) /* pass 1 might be empty */
+            {
+               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            if (png_ptr->pass == 4 && png_ptr->height <= 4)
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            if (png_ptr->pass == 6 && png_ptr->height <= 4)
+            {
+                png_push_have_row(png_ptr, png_bytep_NULL);
+                png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 1:
+         {
+            int i;
+            for (i = 0; i < 8 && png_ptr->pass == 1; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 2) /* skip top 4 generated rows */
+            {
+               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 2:
+         {
+            int i;
+            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 4) /* pass 3 might be empty */
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 3:
+         {
+            int i;
+            for (i = 0; i < 4 && png_ptr->pass == 3; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 4) /* skip top two generated rows */
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 4:
+         {
+            int i;
+            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 6) /* pass 5 might be empty */
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 5:
+         {
+            int i;
+            for (i = 0; i < 2 && png_ptr->pass == 5; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 6) /* skip top generated row */
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 6:
+         {
+            png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+            png_read_push_finish_row(png_ptr);
+            if (png_ptr->pass != 6)
+               break;
+            png_push_have_row(png_ptr, png_bytep_NULL);
+            png_read_push_finish_row(png_ptr);
+         }
+      }
+   }
+   else
+#endif
+   {
+      png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+      png_read_push_finish_row(png_ptr);
+   }
+}
+
+void /* PRIVATE */
+png_read_push_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+
+   /* Height of interlace block.  This is not currently used - if you need
+    * it, uncomment it here and in png.h
+   PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+   */
+#endif
+
+   png_ptr->row_number++;
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      png_memset_check(png_ptr, png_ptr->prev_row, 0,
+         png_ptr->rowbytes + 1);
+      do
+      {
+         int pass;
+         pass = png_ptr->pass;
+         pass++;
+         if ((pass == 1 && png_ptr->width < 5) ||
+             (pass == 3 && png_ptr->width < 3) ||
+             (pass == 5 && png_ptr->width < 2))
+           pass++;
+
+         if (pass > 7)
+            pass--;
+         png_ptr->pass = (png_byte) pass;
+         if (pass < 7)
+           {
+             png_ptr->iwidth = (png_ptr->width +
+                png_pass_inc[pass] - 1 -
+                png_pass_start[pass]) /
+                png_pass_inc[pass];
+
+             png_ptr->irowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,
+                png_ptr->iwidth) + 1;
+
+             if (png_ptr->transformations & PNG_INTERLACE)
+                break;
+
+             png_ptr->num_rows = (png_ptr->height +
+                png_pass_yinc[pass] - 1 -
+                png_pass_ystart[pass]) /
+                png_pass_yinc[pass];
+           }
+         else
+           break;
+
+      } while (png_ptr->iwidth == 0 || png_ptr->num_rows == 0);
+   }
+}
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place tEXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   png_ptr->skip_length = 0;  /* This may not be necessary */
+
+   if (length > (png_uint_32)65535L) /* Can't hold entire string in memory */
+   {
+      png_warning(png_ptr, "tEXt chunk too large to fit in memory");
+      png_ptr->skip_length = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+         (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_tEXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_tEXt(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp text;
+      png_charp key;
+      int ret;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+#if defined(PNG_MAX_MALLOC_64K)
+      if (png_ptr->skip_length)
+         return;
+#endif
+
+      key = png_ptr->current_text;
+
+      for (text = key; *text; text++)
+         /* empty loop */ ;
+
+      if (text < key + png_ptr->current_text_size)
+         text++;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+         (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
+      text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+      text_ptr->lang = NULL;
+      text_ptr->lang_key = NULL;
+#endif
+      text_ptr->text = text;
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_free(png_ptr, key);
+      png_free(png_ptr, text_ptr);
+      png_ptr->current_text = NULL;
+
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store text chunk.");
+   }
+}
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place zTXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We can't handle zTXt chunks > 64K, since we don't have enough space
+    * to be able to store the uncompressed data.  Actually, the threshold
+    * is probably around 32K, but it isn't as definite as 64K is.
+    */
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "zTXt chunk too large to fit in memory");
+      png_push_crc_skip(png_ptr, length);
+      return;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+       (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_zTXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_zTXt(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < (png_uint_32)png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp text;
+      png_charp key;
+      int ret;
+      png_size_t text_size, key_size;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+      key = png_ptr->current_text;
+
+      for (text = key; *text; text++)
+         /* empty loop */ ;
+
+      /* zTXt can't have zero text */
+      if (text >= key + png_ptr->current_text_size)
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         return;
+      }
+
+      text++;
+
+      if (*text != PNG_TEXT_COMPRESSION_zTXt) /* check compression byte */
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         return;
+      }
+
+      text++;
+
+      png_ptr->zstream.next_in = (png_bytep )text;
+      png_ptr->zstream.avail_in = (uInt)(png_ptr->current_text_size -
+         (text - key));
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+      key_size = text - key;
+      text_size = 0;
+      text = NULL;
+      ret = Z_STREAM_END;
+
+      while (png_ptr->zstream.avail_in)
+      {
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret != Z_OK && ret != Z_STREAM_END)
+         {
+            inflateReset(&png_ptr->zstream);
+            png_ptr->zstream.avail_in = 0;
+            png_ptr->current_text = NULL;
+            png_free(png_ptr, key);
+            png_free(png_ptr, text);
+            return;
+         }
+         if (!(png_ptr->zstream.avail_out) || ret == Z_STREAM_END)
+         {
+            if (text == NULL)
+            {
+               text = (png_charp)png_malloc(png_ptr,
+                  (png_uint_32)(png_ptr->zbuf_size - png_ptr->zstream.avail_out
+                     + key_size + 1));
+               png_memcpy(text + key_size, png_ptr->zbuf,
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+               png_memcpy(text, key, key_size);
+               text_size = key_size + png_ptr->zbuf_size -
+                  png_ptr->zstream.avail_out;
+               *(text + text_size) = '\0';
+            }
+            else
+            {
+               png_charp tmp;
+
+               tmp = text;
+               text = (png_charp)png_malloc(png_ptr, text_size +
+                  (png_uint_32)(png_ptr->zbuf_size - png_ptr->zstream.avail_out
+                   + 1));
+               png_memcpy(text, tmp, text_size);
+               png_free(png_ptr, tmp);
+               png_memcpy(text + text_size, png_ptr->zbuf,
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+               text_size += png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               *(text + text_size) = '\0';
+            }
+            if (ret != Z_STREAM_END)
+            {
+               png_ptr->zstream.next_out = png_ptr->zbuf;
+               png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            }
+         }
+         else
+         {
+            break;
+         }
+
+         if (ret == Z_STREAM_END)
+            break;
+      }
+
+      inflateReset(&png_ptr->zstream);
+      png_ptr->zstream.avail_in = 0;
+
+      if (ret != Z_STREAM_END)
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         png_free(png_ptr, text);
+         return;
+      }
+
+      png_ptr->current_text = NULL;
+      png_free(png_ptr, key);
+      key = text;
+      text += key_size;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+          (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = PNG_TEXT_COMPRESSION_zTXt;
+      text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+      text_ptr->lang = NULL;
+      text_ptr->lang_key = NULL;
+#endif
+      text_ptr->text = text;
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_free(png_ptr, key);
+      png_free(png_ptr, text_ptr);
+
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store text chunk.");
+   }
+}
+#endif
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place iTXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   png_ptr->skip_length = 0;  /* This may not be necessary */
+
+   if (length > (png_uint_32)65535L) /* Can't hold entire string in memory */
+   {
+      png_warning(png_ptr, "iTXt chunk too large to fit in memory");
+      png_ptr->skip_length = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+         (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_iTXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_iTXt(png_structp png_ptr, png_infop info_ptr)
+{
+
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp key;
+      int comp_flag;
+      png_charp lang;
+      png_charp lang_key;
+      png_charp text;
+      int ret;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+#if defined(PNG_MAX_MALLOC_64K)
+      if (png_ptr->skip_length)
+         return;
+#endif
+
+      key = png_ptr->current_text;
+
+      for (lang = key; *lang; lang++)
+         /* empty loop */ ;
+
+      if (lang < key + png_ptr->current_text_size - 3)
+         lang++;
+
+      comp_flag = *lang++;
+      lang++;     /* skip comp_type, always zero */
+
+      for (lang_key = lang; *lang_key; lang_key++)
+         /* empty loop */ ;
+      lang_key++;        /* skip NUL separator */
+
+      text=lang_key;
+      if (lang_key < key + png_ptr->current_text_size - 1)
+      {
+        for (; *text; text++)
+           /* empty loop */ ;
+      }
+
+      if (text < key + png_ptr->current_text_size)
+         text++;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+         (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = comp_flag + 2;
+      text_ptr->key = key;
+      text_ptr->lang = lang;
+      text_ptr->lang_key = lang_key;
+      text_ptr->text = text;
+      text_ptr->text_length = 0;
+      text_ptr->itxt_length = png_strlen(text);
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_ptr->current_text = NULL;
+
+      png_free(png_ptr, text_ptr);
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store iTXt chunk.");
+   }
+}
+#endif
+
+/* This function is called when we haven't found a handler for this
+ * chunk.  If there isn't a problem with the chunk itself (ie a bad chunk
+ * name or a critical chunk), the chunk is (currently) silently ignored.
+ */
+void /* PRIVATE */
+png_push_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   png_uint_32 skip=0;
+   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+
+   if (!(png_ptr->chunk_name[0] & 0x20))
+   {
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+     if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+          PNG_HANDLE_CHUNK_ALWAYS
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+          && png_ptr->read_user_chunk_fn == NULL
+#endif
+        )
+#endif
+        png_chunk_error(png_ptr, "unknown critical chunk");
+
+     info_ptr = info_ptr; /* to quiet some compiler warnings */
+   }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS)
+   {
+#ifdef PNG_MAX_MALLOC_64K
+      if (length > (png_uint_32)65535L)
+      {
+          png_warning(png_ptr, "unknown chunk too large to fit in memory");
+          skip = length - (png_uint_32)65535L;
+          length = (png_uint_32)65535L;
+      }
+#endif
+      png_memcpy((png_charp)png_ptr->unknown_chunk.name,
+                 (png_charp)png_ptr->chunk_name, 
+                 png_sizeof(png_ptr->unknown_chunk.name));
+      png_ptr->unknown_chunk.name[png_sizeof(png_ptr->unknown_chunk.name)-1]='\0';
+
+      png_ptr->unknown_chunk.size = (png_size_t)length;
+      if (length == 0)
+         png_ptr->unknown_chunk.data = NULL;
+      else
+      {
+         png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
+         png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
+      }
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+      if(png_ptr->read_user_chunk_fn != NULL)
+      {
+         /* callback to user unknown chunk handler */
+         int ret;
+         ret = (*(png_ptr->read_user_chunk_fn))
+           (png_ptr, &png_ptr->unknown_chunk);
+         if (ret < 0)
+            png_chunk_error(png_ptr, "error in user chunk");
+         if (ret == 0)
+         {
+            if (!(png_ptr->chunk_name[0] & 0x20))
+               if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+                    PNG_HANDLE_CHUNK_ALWAYS)
+                  png_chunk_error(png_ptr, "unknown critical chunk");
+            png_set_unknown_chunks(png_ptr, info_ptr,
+               &png_ptr->unknown_chunk, 1);
+         }
+      }
+      else
+#endif
+        png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1);
+      png_free(png_ptr, png_ptr->unknown_chunk.data);
+      png_ptr->unknown_chunk.data = NULL;
+   }
+   else
+#endif
+      skip=length;
+   png_push_crc_skip(png_ptr, skip);
+}
+
+void /* PRIVATE */
+png_push_have_info(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->info_fn != NULL)
+      (*(png_ptr->info_fn))(png_ptr, info_ptr);
+}
+
+void /* PRIVATE */
+png_push_have_end(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->end_fn != NULL)
+      (*(png_ptr->end_fn))(png_ptr, info_ptr);
+}
+
+void /* PRIVATE */
+png_push_have_row(png_structp png_ptr, png_bytep row)
+{
+   if (png_ptr->row_fn != NULL)
+      (*(png_ptr->row_fn))(png_ptr, row, png_ptr->row_number,
+         (int)png_ptr->pass);
+}
+
+void PNGAPI
+png_progressive_combine_row (png_structp png_ptr,
+   png_bytep old_row, png_bytep new_row)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST int FARDATA png_pass_dsp_mask[7] =
+      {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
+#endif
+   if(png_ptr == NULL) return;
+   if (new_row != NULL)    /* new_row must == png_ptr->row_buf here. */
+      png_combine_row(png_ptr, old_row, png_pass_dsp_mask[png_ptr->pass]);
+}
+
+void PNGAPI
+png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr,
+   png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
+   png_progressive_end_ptr end_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->info_fn = info_fn;
+   png_ptr->row_fn = row_fn;
+   png_ptr->end_fn = end_fn;
+
+   png_set_read_fn(png_ptr, progressive_ptr, png_push_fill_buffer);
+}
+
+png_voidp PNGAPI
+png_get_progressive_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return png_ptr->io_ptr;
+}
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
diff --git a/PNG/pngread.c b/PNG/pngread.c
new file mode 100644
index 0000000..bd8bcd9
--- /dev/null
+++ b/PNG/pngread.c
@@ -0,0 +1,1479 @@
+
+/* pngread.c - read a PNG file
+ *
+ * Last changed in libpng 1.2.25 [February 18, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains routines that an application calls directly to
+ * read a PNG file or stream.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Create a PNG structure for reading, and allocate any memory needed. */
+png_structp PNGAPI
+png_create_read_struct(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)
+{
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_read_struct_2(user_png_ver, error_ptr, error_fn,
+      warn_fn, png_voidp_NULL, png_malloc_ptr_NULL, png_free_ptr_NULL));
+}
+
+/* Alternate create PNG structure for reading, and allocate any memory needed. */
+png_structp PNGAPI
+png_create_read_struct_2(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   png_structp png_ptr;
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   jmp_buf jmpbuf;
+#endif
+#endif
+
+   int i;
+
+   png_debug(1, "in png_create_read_struct\n");
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
+      (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr);
+#else
+   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+#endif
+   if (png_ptr == NULL)
+      return (NULL);
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_ptr->jmpbuf))
+#endif
+   {
+      png_free(png_ptr, png_ptr->zbuf);
+      png_ptr->zbuf=NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)png_ptr,
+         (png_free_ptr)free_fn, (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)png_ptr);
+#endif
+      return (NULL);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#endif
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
+#endif
+
+   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
+
+   if(user_png_ver)
+   {
+     i=0;
+     do
+     {
+       if(user_png_ver[i] != png_libpng_ver[i])
+          png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+     } while (png_libpng_ver[i++]);
+   }
+   else
+        png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+   
+
+   if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH)
+   {
+     /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so
+      * we must recompile any applications that use any older library version.
+      * For versions after libpng 1.0, we will be compatible, so we need
+      * only check the first digit.
+      */
+     if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] ||
+         (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) ||
+         (user_png_ver[0] == '0' && user_png_ver[2] < '9'))
+     {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+        char msg[80];
+        if (user_png_ver)
+        {
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
+             user_png_ver);
+          png_warning(png_ptr, msg);
+        }
+        png_snprintf(msg, 80,
+             "Application  is  running with png.c from libpng-%.20s",
+           png_libpng_ver);
+        png_warning(png_ptr, msg);
+#endif
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+        png_ptr->flags=0;
+#endif
+        png_error(png_ptr,
+           "Incompatible libpng version in application and library");
+     }
+   }
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+     (png_uint_32)png_ptr->zbuf_size);
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+
+   switch (inflateInit(&png_ptr->zstream))
+   {
+     case Z_OK: /* Do nothing */ break;
+     case Z_MEM_ERROR:
+     case Z_STREAM_ERROR: png_error(png_ptr, "zlib memory error"); break;
+     case Z_VERSION_ERROR: png_error(png_ptr, "zlib version error"); break;
+     default: png_error(png_ptr, "Unknown zlib error");
+   }
+
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+   png_set_read_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL);
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* Applications that neglect to set up their own setjmp() and then encounter
+   a png_error() will longjmp here.  Since the jmpbuf is then meaningless we
+   abort instead of returning. */
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+      PNG_ABORT();
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#else
+   if (setjmp(png_ptr->jmpbuf))
+      PNG_ABORT();
+#endif
+#endif
+   return (png_ptr);
+}
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Initialize PNG structure for reading, and allocate any memory needed.
+   This interface is deprecated in favour of the png_create_read_struct(),
+   and it will disappear as of libpng-1.3.0. */
+#undef png_read_init
+void PNGAPI
+png_read_init(png_structp png_ptr)
+{
+   /* We only come here via pre-1.0.7-compiled applications */
+   png_read_init_2(png_ptr, "1.0.6 or earlier", 0, 0);
+}
+
+void PNGAPI
+png_read_init_2(png_structp png_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size, png_size_t png_info_size)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   if(png_ptr == NULL) return;
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+   if(png_sizeof(png_struct) > png_struct_size ||
+      png_sizeof(png_info) > png_info_size)
+   {
+      char msg[80];
+      png_ptr->warning_fn=NULL;
+      if (user_png_ver)
+      {
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
+           user_png_ver);
+        png_warning(png_ptr, msg);
+      }
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
+         png_libpng_ver);
+      png_warning(png_ptr, msg);
+   }
+#endif
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The png struct allocated by the application for reading is too small.");
+     }
+   if(png_sizeof(png_info) > png_info_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+         "The info struct allocated by application for reading is too small.");
+     }
+   png_read_init_3(&png_ptr, user_png_ver, png_struct_size);
+}
+#endif /* PNG_1_0_X || PNG_1_2_X */
+
+void PNGAPI
+png_read_init_3(png_structpp ptr_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp;  /* to save current jump buffer */
+#endif
+
+   int i=0;
+
+   png_structp png_ptr=*ptr_ptr;
+
+   if(png_ptr == NULL) return;
+
+   do
+   {
+     if(user_png_ver[i] != png_libpng_ver[i])
+     {
+#ifdef PNG_LEGACY_SUPPORTED
+       png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+#else
+       png_ptr->warning_fn=NULL;
+       png_warning(png_ptr,
+        "Application uses deprecated png_read_init() and should be recompiled.");
+       break;
+#endif
+     }
+   } while (png_libpng_ver[i++]);
+
+   png_debug(1, "in png_read_init_3\n");
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* save jump buffer and error functions */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_destroy_struct(png_ptr);
+       *ptr_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+       png_ptr = *ptr_ptr;
+     }
+
+   /* reset all variables to 0 */
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* restore jump buffer */
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+     (png_uint_32)png_ptr->zbuf_size);
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+
+   switch (inflateInit(&png_ptr->zstream))
+   {
+     case Z_OK: /* Do nothing */ break;
+     case Z_MEM_ERROR:
+     case Z_STREAM_ERROR: png_error(png_ptr, "zlib memory"); break;
+     case Z_VERSION_ERROR: png_error(png_ptr, "zlib version"); break;
+     default: png_error(png_ptr, "Unknown zlib error");
+   }
+
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+   png_set_read_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL);
+}
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the information before the actual image data.  This has been
+ * changed in v0.90 to allow reading a file that already has the magic
+ * bytes read from the stream.  You can tell libpng how many bytes have
+ * been read from the beginning of the stream (up to the maximum of 8)
+ * via png_set_sig_bytes(), and we will only check the remaining bytes
+ * here.  The application can then have access to the signature bytes we
+ * read if it is determined that this isn't a valid PNG file.
+ */
+void PNGAPI
+png_read_info(png_structp png_ptr, png_infop info_ptr)
+{
+   if(png_ptr == NULL || info_ptr == NULL) return;
+   png_debug(1, "in png_read_info\n");
+   /* If we haven't checked all of the PNG signature bytes, do so now. */
+   if (png_ptr->sig_bytes < 8)
+   {
+      png_size_t num_checked = png_ptr->sig_bytes,
+                 num_to_check = 8 - num_checked;
+
+      png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check);
+      png_ptr->sig_bytes = 8;
+
+      if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
+      {
+         if (num_checked < 4 &&
+             png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
+            png_error(png_ptr, "Not a PNG file");
+         else
+            png_error(png_ptr, "PNG file corrupted by ASCII conversion");
+      }
+      if (num_checked < 3)
+         png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+   }
+
+   for(;;)
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+      png_byte chunk_length[4];
+      png_uint_32 length;
+
+      png_read_data(png_ptr, chunk_length, 4);
+      length = png_get_uint_31(png_ptr,chunk_length);
+
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+
+      png_debug2(0, "Reading %s chunk, length=%lu.\n", png_ptr->chunk_name,
+         length);
+
+      /* This should be a binary subdivision search or a hash for
+       * matching the chunk name rather than a linear search.
+       */
+      if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+        if(png_ptr->mode & PNG_AFTER_IDAT)
+          png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+      if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+         png_handle_IHDR(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+         png_handle_IEND(png_ptr, info_ptr, length);
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+      {
+         if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+            png_ptr->mode |= PNG_HAVE_IDAT;
+         png_handle_unknown(png_ptr, info_ptr, length);
+         if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+            png_ptr->mode |= PNG_HAVE_PLTE;
+         else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         {
+            if (!(png_ptr->mode & PNG_HAVE_IHDR))
+               png_error(png_ptr, "Missing IHDR before IDAT");
+            else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                     !(png_ptr->mode & PNG_HAVE_PLTE))
+               png_error(png_ptr, "Missing PLTE before IDAT");
+            break;
+         }
+      }
+#endif
+      else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_handle_PLTE(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         if (!(png_ptr->mode & PNG_HAVE_IHDR))
+            png_error(png_ptr, "Missing IHDR before IDAT");
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                  !(png_ptr->mode & PNG_HAVE_PLTE))
+            png_error(png_ptr, "Missing PLTE before IDAT");
+
+         png_ptr->idat_size = length;
+         png_ptr->mode |= PNG_HAVE_IDAT;
+         break;
+      }
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+      else
+         png_handle_unknown(png_ptr, info_ptr, length);
+   }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+/* optional call to update the users info_ptr structure */
+void PNGAPI
+png_read_update_info(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_read_update_info\n");
+   if(png_ptr == NULL) return;
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+   else
+      png_warning(png_ptr,
+      "Ignoring extra png_read_update_info() call; row buffer not reallocated");
+   png_read_transform_info(png_ptr, info_ptr);
+}
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Initialize palette, background, etc, after transformations
+ * are set, but before any reading takes place.  This allows
+ * the user to obtain a gamma-corrected palette, for example.
+ * If the user doesn't call this, we will do it ourselves.
+ */
+void PNGAPI
+png_start_read_image(png_structp png_ptr)
+{
+   png_debug(1, "in png_start_read_image\n");
+   if(png_ptr == NULL) return;
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+void PNGAPI
+png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST PNG_IDAT;
+   PNG_CONST int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
+     0xff};
+   PNG_CONST int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+#endif
+   int ret;
+   if(png_ptr == NULL) return;
+   png_debug2(1, "in png_read_row (row %lu, pass %d)\n",
+      png_ptr->row_number, png_ptr->pass);
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+   /* check for transforms that have been set but were defined out */
+#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && !defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined.");
+#endif
+   }
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* if interlaced and we do not need a new row, combine row and return */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if (png_ptr->row_number & 0x07)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 1:
+            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 4))
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 3:
+            if ((png_ptr->row_number & 3) || png_ptr->width < 3)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 4:
+            if ((png_ptr->row_number & 3) != 2)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 2))
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 5:
+            if ((png_ptr->row_number & 1) || png_ptr->width < 2)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 6:
+            if (!(png_ptr->row_number & 1))
+            {
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+      }
+   }
+#endif
+
+   if (!(png_ptr->mode & PNG_HAVE_IDAT))
+      png_error(png_ptr, "Invalid attempt to read row data");
+
+   png_ptr->zstream.next_out = png_ptr->row_buf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+   do
+   {
+      if (!(png_ptr->zstream.avail_in))
+      {
+         while (!png_ptr->idat_size)
+         {
+            png_byte chunk_length[4];
+
+            png_crc_finish(png_ptr, 0);
+
+            png_read_data(png_ptr, chunk_length, 4);
+            png_ptr->idat_size = png_get_uint_31(png_ptr,chunk_length);
+
+            png_reset_crc(png_ptr);
+            png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+            if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+               png_error(png_ptr, "Not enough image data");
+         }
+         png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
+         png_ptr->zstream.next_in = png_ptr->zbuf;
+         if (png_ptr->zbuf_size > png_ptr->idat_size)
+            png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
+         png_crc_read(png_ptr, png_ptr->zbuf,
+            (png_size_t)png_ptr->zstream.avail_in);
+         png_ptr->idat_size -= png_ptr->zstream.avail_in;
+      }
+      ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+      if (ret == Z_STREAM_END)
+      {
+         if (png_ptr->zstream.avail_out || png_ptr->zstream.avail_in ||
+            png_ptr->idat_size)
+            png_error(png_ptr, "Extra compressed data");
+         png_ptr->mode |= PNG_AFTER_IDAT;
+         png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+         break;
+      }
+      if (ret != Z_OK)
+         png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
+                   "Decompression error");
+
+   } while (png_ptr->zstream.avail_out);
+
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->iwidth;
+   png_ptr->row_info.channels = png_ptr->channels;
+   png_ptr->row_info.bit_depth = png_ptr->bit_depth;
+   png_ptr->row_info.pixel_depth = png_ptr->pixel_depth;
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+       png_ptr->row_info.width);
+
+   if(png_ptr->row_buf[0])
+   png_read_filter_row(png_ptr, &(png_ptr->row_info),
+      png_ptr->row_buf + 1, png_ptr->prev_row + 1,
+      (int)(png_ptr->row_buf[0]));
+
+   png_memcpy_check(png_ptr, png_ptr->prev_row, png_ptr->row_buf,
+      png_ptr->rowbytes + 1);
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_read_intrapixel(&(png_ptr->row_info), png_ptr->row_buf + 1);
+   }
+#endif
+
+
+   if (png_ptr->transformations || (png_ptr->flags&PNG_FLAG_STRIP_ALPHA))
+      png_do_read_transformations(png_ptr);
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* blow up interlaced rows to full size */
+   if (png_ptr->interlaced &&
+      (png_ptr->transformations & PNG_INTERLACE))
+   {
+      if (png_ptr->pass < 6)
+/*       old interface (pre-1.0.9):
+         png_do_read_interlace(&(png_ptr->row_info),
+            png_ptr->row_buf + 1, png_ptr->pass, png_ptr->transformations);
+ */
+         png_do_read_interlace(png_ptr);
+
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row,
+            png_pass_dsp_mask[png_ptr->pass]);
+      if (row != NULL)
+         png_combine_row(png_ptr, row,
+            png_pass_mask[png_ptr->pass]);
+   }
+   else
+#endif
+   {
+      if (row != NULL)
+         png_combine_row(png_ptr, row, 0xff);
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row, 0xff);
+   }
+   png_read_finish_row(png_ptr);
+
+   if (png_ptr->read_row_fn != NULL)
+      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read one or more rows of image data.  If the image is interlaced,
+ * and png_set_interlace_handling() has been called, the rows need to
+ * contain the contents of the rows from the previous pass.  If the
+ * image has alpha or transparency, and png_handle_alpha()[*] has been
+ * called, the rows contents must be initialized to the contents of the
+ * screen.
+ *
+ * "row" holds the actual image, and pixels are placed in it
+ * as they arrive.  If the image is displayed after each pass, it will
+ * appear to "sparkle" in.  "display_row" can be used to display a
+ * "chunky" progressive image, with finer detail added as it becomes
+ * available.  If you do not want this "chunky" display, you may pass
+ * NULL for display_row.  If you do not want the sparkle display, and
+ * you have not called png_handle_alpha(), you may pass NULL for rows.
+ * If you have called png_handle_alpha(), and the image has either an
+ * alpha channel or a transparency chunk, you must provide a buffer for
+ * rows.  In this case, you do not have to provide a display_row buffer
+ * also, but you may.  If the image is not interlaced, or if you have
+ * not called png_set_interlace_handling(), the display_row buffer will
+ * be ignored, so pass NULL to it.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+
+void PNGAPI
+png_read_rows(png_structp png_ptr, png_bytepp row,
+   png_bytepp display_row, png_uint_32 num_rows)
+{
+   png_uint_32 i;
+   png_bytepp rp;
+   png_bytepp dp;
+
+   png_debug(1, "in png_read_rows\n");
+   if(png_ptr == NULL) return;
+   rp = row;
+   dp = display_row;
+   if (rp != NULL && dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp++;
+         png_bytep dptr = *dp++;
+
+         png_read_row(png_ptr, rptr, dptr);
+      }
+   else if(rp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp;
+         png_read_row(png_ptr, rptr, png_bytep_NULL);
+         rp++;
+      }
+   else if(dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep dptr = *dp;
+         png_read_row(png_ptr, png_bytep_NULL, dptr);
+         dp++;
+      }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the entire image.  If the image has an alpha channel or a tRNS
+ * chunk, and you have called png_handle_alpha()[*], you will need to
+ * initialize the image to the current image that PNG will be overlaying.
+ * We set the num_rows again here, in case it was incorrectly set in
+ * png_read_start_row() by a call to png_read_update_info() or
+ * png_start_read_image() if png_set_interlace_handling() wasn't called
+ * prior to either of these functions like it should have been.  You can
+ * only call this function once.  If you desire to have an image for
+ * each pass of a interlaced image, use png_read_rows() instead.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+void PNGAPI
+png_read_image(png_structp png_ptr, png_bytepp image)
+{
+   png_uint_32 i,image_height;
+   int pass, j;
+   png_bytepp rp;
+
+   png_debug(1, "in png_read_image\n");
+   if(png_ptr == NULL) return;
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   pass = png_set_interlace_handling(png_ptr);
+#else
+   if (png_ptr->interlaced)
+      png_error(png_ptr,
+        "Cannot read interlaced image -- interlace handler disabled.");
+   pass = 1;
+#endif
+
+
+   image_height=png_ptr->height;
+   png_ptr->num_rows = image_height; /* Make sure this is set correctly */
+
+   for (j = 0; j < pass; j++)
+   {
+      rp = image;
+      for (i = 0; i < image_height; i++)
+      {
+         png_read_row(png_ptr, *rp, png_bytep_NULL);
+         rp++;
+      }
+   }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the end of the PNG file.  Will not read past the end of the
+ * file, will verify the end is accurate, and will read any comments
+ * or time information at the end of the file, if info is not NULL.
+ */
+void PNGAPI
+png_read_end(png_structp png_ptr, png_infop info_ptr)
+{
+   png_byte chunk_length[4];
+   png_uint_32 length;
+
+   png_debug(1, "in png_read_end\n");
+   if(png_ptr == NULL) return;
+   png_crc_finish(png_ptr, 0); /* Finish off CRC from last IDAT chunk */
+
+   do
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+
+      png_read_data(png_ptr, chunk_length, 4);
+      length = png_get_uint_31(png_ptr,chunk_length);
+
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+
+      png_debug1(0, "Reading %s chunk.\n", png_ptr->chunk_name);
+
+      if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+         png_handle_IHDR(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+         png_handle_IEND(png_ptr, info_ptr, length);
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+      {
+         if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         {
+            if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+               png_error(png_ptr, "Too many IDAT's found");
+         }
+         png_handle_unknown(png_ptr, info_ptr, length);
+         if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+            png_ptr->mode |= PNG_HAVE_PLTE;
+      }
+#endif
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         /* Zero length IDATs are legal after the last IDAT has been
+          * read, but not after other chunks have been read.
+          */
+         if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+            png_error(png_ptr, "Too many IDAT's found");
+         png_crc_finish(png_ptr, length);
+      }
+      else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_handle_PLTE(png_ptr, info_ptr, length);
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+      else
+         png_handle_unknown(png_ptr, info_ptr, length);
+   } while (!(png_ptr->mode & PNG_HAVE_IEND));
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+/* free all memory used by the read */
+void PNGAPI
+png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr,
+   png_infopp end_info_ptr_ptr)
+{
+   png_structp png_ptr = NULL;
+   png_infop info_ptr = NULL, end_info_ptr = NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn = NULL;
+   png_voidp mem_ptr = NULL;
+#endif
+
+   png_debug(1, "in png_destroy_read_struct\n");
+   if (png_ptr_ptr != NULL)
+   {
+      png_ptr = *png_ptr_ptr;
+   }
+   if (png_ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+   mem_ptr = png_ptr->mem_ptr;
+#endif
+
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (end_info_ptr_ptr != NULL)
+      end_info_ptr = *end_info_ptr_ptr;
+
+   png_read_destroy(png_ptr, info_ptr, end_info_ptr);
+
+   if (info_ptr != NULL)
+   {
+#if defined(PNG_TEXT_SUPPORTED)
+      png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, -1);
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
+          (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+
+   if (end_info_ptr != NULL)
+   {
+#if defined(PNG_READ_TEXT_SUPPORTED)
+      png_free_data(png_ptr, end_info_ptr, PNG_FREE_TEXT, -1);
+#endif
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)end_info_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)end_info_ptr);
+#endif
+      *end_info_ptr_ptr = NULL;
+   }
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
+       (png_voidp)mem_ptr);
+#else
+   png_destroy_struct((png_voidp)png_ptr);
+#endif
+   *png_ptr_ptr = NULL;
+}
+
+/* free all memory used by the read (old method) */
+void /* PRIVATE */
+png_read_destroy(png_structp png_ptr, png_infop info_ptr, png_infop end_info_ptr)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp;
+#endif
+   png_error_ptr error_fn;
+   png_error_ptr warning_fn;
+   png_voidp error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn;
+#endif
+
+   png_debug(1, "in png_read_destroy\n");
+   if (info_ptr != NULL)
+      png_info_destroy(png_ptr, info_ptr);
+
+   if (end_info_ptr != NULL)
+      png_info_destroy(png_ptr, end_info_ptr);
+
+   png_free(png_ptr, png_ptr->zbuf);
+   png_free(png_ptr, png_ptr->big_row_buf);
+   png_free(png_ptr, png_ptr->prev_row);
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   png_free(png_ptr, png_ptr->palette_lookup);
+   png_free(png_ptr, png_ptr->dither_index);
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   png_free(png_ptr, png_ptr->gamma_table);
+#endif
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_free(png_ptr, png_ptr->gamma_from_1);
+   png_free(png_ptr, png_ptr->gamma_to_1);
+#endif
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_PLTE)
+      png_zfree(png_ptr, png_ptr->palette);
+   png_ptr->free_me &= ~PNG_FREE_PLTE;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_PLTE)
+      png_zfree(png_ptr, png_ptr->palette);
+   png_ptr->flags &= ~PNG_FLAG_FREE_PLTE;
+#endif
+#if defined(PNG_tRNS_SUPPORTED) || \
+    defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_TRNS)
+      png_free(png_ptr, png_ptr->trans);
+   png_ptr->free_me &= ~PNG_FREE_TRNS;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_TRNS)
+      png_free(png_ptr, png_ptr->trans);
+   png_ptr->flags &= ~PNG_FLAG_FREE_TRNS;
+#endif
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_HIST)
+      png_free(png_ptr, png_ptr->hist);
+   png_ptr->free_me &= ~PNG_FREE_HIST;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_HIST)
+      png_free(png_ptr, png_ptr->hist);
+   png_ptr->flags &= ~PNG_FLAG_FREE_HIST;
+#endif
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->gamma_16_table != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_table[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_table);
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->gamma_16_from_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_from_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_from_1);
+   }
+   if (png_ptr->gamma_16_to_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_to_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_to_1);
+   }
+#endif
+#endif
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_free(png_ptr, png_ptr->time_buffer);
+#endif
+
+   inflateEnd(&png_ptr->zstream);
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_free(png_ptr, png_ptr->save_buffer);
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+#ifdef PNG_TEXT_SUPPORTED
+   png_free(png_ptr, png_ptr->current_text);
+#endif /* PNG_TEXT_SUPPORTED */
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+   /* Save the important info out of the png_struct, in case it is
+    * being used again.
+    */
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   error_fn = png_ptr->error_fn;
+   warning_fn = png_ptr->warning_fn;
+   error_ptr = png_ptr->error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+#endif
+
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+   png_ptr->error_ptr = error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr->free_fn = free_fn;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+}
+
+void PNGAPI
+png_set_read_status_fn(png_structp png_ptr, png_read_status_ptr read_row_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->read_row_fn = read_row_fn;
+}
+
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_read_png(png_structp png_ptr, png_infop info_ptr,
+                           int transforms,
+                           voidp params)
+{
+   int row;
+
+   if(png_ptr == NULL) return;
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+   /* invert the alpha channel from opacity to transparency
+    */
+   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
+       png_set_invert_alpha(png_ptr);
+#endif
+
+   /* png_read_info() gives us all of the information from the
+    * PNG file before the first IDAT (image data chunk).
+    */
+   png_read_info(png_ptr, info_ptr);
+   if (info_ptr->height > PNG_UINT_32_MAX/png_sizeof(png_bytep))
+      png_error(png_ptr,"Image is too high to process with png_read_png()");
+
+   /* -------------- image transformations start here ------------------- */
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   /* tell libpng to strip 16 bit/color files down to 8 bits per color
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_16)
+       png_set_strip_16(png_ptr);
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   /* Strip alpha bytes from the input data without combining with
+    * the background (not recommended).
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_ALPHA)
+       png_set_strip_alpha(png_ptr);
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) && !defined(PNG_READ_EXPAND_SUPPORTED)
+   /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single
+    * byte into separate bytes (useful for paletted and grayscale images).
+    */
+   if (transforms & PNG_TRANSFORM_PACKING)
+       png_set_packing(png_ptr);
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+   /* Change the order of packed pixels to least significant bit first
+    * (not useful if you are using png_set_packing).
+    */
+   if (transforms & PNG_TRANSFORM_PACKSWAP)
+       png_set_packswap(png_ptr);
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   /* Expand paletted colors into true RGB triplets
+    * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel
+    * Expand paletted or RGB images with transparency to full alpha
+    * channels so the data will be available as RGBA quartets.
+    */
+   if (transforms & PNG_TRANSFORM_EXPAND)
+       if ((png_ptr->bit_depth < 8) ||
+           (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ||
+           (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)))
+         png_set_expand(png_ptr);
+#endif
+
+   /* We don't handle background color or gamma transformation or dithering.
+    */
+
+#if defined(PNG_READ_INVERT_SUPPORTED)
+   /* invert monochrome files to have 0 as white and 1 as black
+    */
+   if (transforms & PNG_TRANSFORM_INVERT_MONO)
+       png_set_invert_mono(png_ptr);
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   /* If you want to shift the pixel values from the range [0,255] or
+    * [0,65535] to the original [0,7] or [0,31], or whatever range the
+    * colors were originally in:
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT)
+       && png_get_valid(png_ptr, info_ptr, PNG_INFO_sBIT))
+   {
+      png_color_8p sig_bit;
+
+      png_get_sBIT(png_ptr, info_ptr, &sig_bit);
+      png_set_shift(png_ptr, sig_bit);
+   }
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED)
+   /* flip the RGB pixels to BGR (or RGBA to BGRA)
+    */
+   if (transforms & PNG_TRANSFORM_BGR)
+       png_set_bgr(png_ptr);
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+   /* swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR)
+    */
+   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
+       png_set_swap_alpha(png_ptr);
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED)
+   /* swap bytes of 16 bit files to least significant byte first
+    */
+   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
+       png_set_swap(png_ptr);
+#endif
+
+   /* We don't handle adding filler bytes */
+
+   /* Optional call to gamma correct and add the background to the palette
+    * and update info structure.  REQUIRED if you are expecting libpng to
+    * update the palette for you (i.e., you selected such a transform above).
+    */
+   png_read_update_info(png_ptr, info_ptr);
+
+   /* -------------- image transformations end here ------------------- */
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+#endif
+   if(info_ptr->row_pointers == NULL)
+   {
+      info_ptr->row_pointers = (png_bytepp)png_malloc(png_ptr,
+         info_ptr->height * png_sizeof(png_bytep));
+#ifdef PNG_FREE_ME_SUPPORTED
+      info_ptr->free_me |= PNG_FREE_ROWS;
+#endif
+      for (row = 0; row < (int)info_ptr->height; row++)
+      {
+         info_ptr->row_pointers[row] = (png_bytep)png_malloc(png_ptr,
+            png_get_rowbytes(png_ptr, info_ptr));
+      }
+   }
+
+   png_read_image(png_ptr, info_ptr->row_pointers);
+   info_ptr->valid |= PNG_INFO_IDAT;
+
+   /* read rest of file, and get additional chunks in info_ptr - REQUIRED */
+   png_read_end(png_ptr, info_ptr);
+
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
+
+}
+#endif /* PNG_INFO_IMAGE_SUPPORTED */
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/PNG/pngrio.c b/PNG/pngrio.c
new file mode 100644
index 0000000..7d2522f
--- /dev/null
+++ b/PNG/pngrio.c
@@ -0,0 +1,167 @@
+
+/* pngrio.c - functions for data input
+ *
+ * Last changed in libpng 1.2.13 November 13, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all input.  Users who need
+ * special handling are expected to write a function that has the same
+ * arguments as this and performs a similar function, but that possibly
+ * has a different input method.  Note that you shouldn't change this
+ * function, but rather write a replacement function and then make
+ * libpng use it at run time with png_set_read_fn(...).
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Read the data from whatever input you are using.  The default routine
+   reads from a file pointer.  Note that this routine sometimes gets called
+   with very small lengths, so you should implement some kind of simple
+   buffering if you are using unbuffered reads.  This should never be asked
+   to read more then 64K on a 16 bit machine. */
+void /* PRIVATE */
+png_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_debug1(4,"reading %d bytes\n", (int)length);
+   if (png_ptr->read_data_fn != NULL)
+      (*(png_ptr->read_data_fn))(png_ptr, data, length);
+   else
+      png_error(png_ptr, "Call to NULL read function");
+}
+
+#if !defined(PNG_NO_STDIO)
+/* This is the function that does the actual reading of data.  If you are
+   not reading from a standard C stream, you should create a replacement
+   read_data function and use it at run time with png_set_read_fn(), rather
+   than changing the library. */
+#ifndef USE_FAR_KEYWORD
+void PNGAPI
+png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_size_t check;
+
+   if(png_ptr == NULL) return;
+   /* fread() returns 0 on error, so it is OK to store this in a png_size_t
+    * instead of an int, which is what fread() actually returns.
+    */
+#if defined(_WIN32_WCE)
+   if ( !ReadFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+      check = 0;
+#else
+   check = (png_size_t)fread(data, (png_size_t)1, length,
+      (png_FILE_p)png_ptr->io_ptr);
+#endif
+
+   if (check != length)
+      png_error(png_ptr, "Read Error");
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+static void PNGAPI
+png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   int check;
+   png_byte *n_data;
+   png_FILE_p io_ptr;
+
+   if(png_ptr == NULL) return;
+   /* Check if data really is near. If so, use usual code. */
+   n_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)n_data == data)
+   {
+#if defined(_WIN32_WCE)
+      if ( !ReadFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+         check = 0;
+#else
+      check = fread(n_data, 1, length, io_ptr);
+#endif
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t read, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         read = MIN(NEAR_BUF_SIZE, remaining);
+#if defined(_WIN32_WCE)
+         if ( !ReadFile((HANDLE)(io_ptr), buf, read, &err, NULL) )
+            err = 0;
+#else
+         err = fread(buf, (png_size_t)1, read, io_ptr);
+#endif
+         png_memcpy(data, buf, read); /* copy far buffer to near buffer */
+         if(err != read)
+            break;
+         else
+            check += err;
+         data += read;
+         remaining -= read;
+      }
+      while (remaining != 0);
+   }
+   if ((png_uint_32)check != (png_uint_32)length)
+      png_error(png_ptr, "read Error");
+}
+#endif
+#endif
+
+/* This function allows the application to supply a new input function
+   for libpng if standard C streams aren't being used.
+
+   This function takes as its arguments:
+   png_ptr      - pointer to a png input data structure
+   io_ptr       - pointer to user supplied structure containing info about
+                  the input functions.  May be NULL.
+   read_data_fn - pointer to a new input function that takes as its
+                  arguments a pointer to a png_struct, a pointer to
+                  a location where input data can be stored, and a 32-bit
+                  unsigned int that is the number of bytes to be read.
+                  To exit and output any fatal error messages the new write
+                  function should call png_error(png_ptr, "Error msg"). */
+void PNGAPI
+png_set_read_fn(png_structp png_ptr, png_voidp io_ptr,
+   png_rw_ptr read_data_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = io_ptr;
+
+#if !defined(PNG_NO_STDIO)
+   if (read_data_fn != NULL)
+      png_ptr->read_data_fn = read_data_fn;
+   else
+      png_ptr->read_data_fn = png_default_read_data;
+#else
+   png_ptr->read_data_fn = read_data_fn;
+#endif
+
+   /* It is an error to write to a read device */
+   if (png_ptr->write_data_fn != NULL)
+   {
+      png_ptr->write_data_fn = NULL;
+      png_warning(png_ptr,
+         "It's an error to set both read_data_fn and write_data_fn in the ");
+      png_warning(png_ptr,
+         "same structure.  Resetting write_data_fn to NULL.");
+   }
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_ptr->output_flush_fn = NULL;
+#endif
+}
+#endif /* PNG_READ_SUPPORTED */
diff --git a/PNG/pngrtran.c b/PNG/pngrtran.c
new file mode 100644
index 0000000..873b22c
--- /dev/null
+++ b/PNG/pngrtran.c
@@ -0,0 +1,4292 @@
+
+/* pngrtran.c - transforms the data in a row for PNG readers
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains functions optionally called by an application
+ * in order to tell libpng how to handle data when reading a PNG.
+ * Transformations that are used in both reading and writing are
+ * in pngtrans.c.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Set the action on getting a CRC error for an ancillary or critical chunk. */
+void PNGAPI
+png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action)
+{
+   png_debug(1, "in png_set_crc_action\n");
+   /* Tell libpng how we react to CRC errors in critical chunks */
+   if(png_ptr == NULL) return;
+   switch (crit_action)
+   {
+      case PNG_CRC_NO_CHANGE:                        /* leave setting as is */
+         break;
+      case PNG_CRC_WARN_USE:                               /* warn/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE;
+         break;
+      case PNG_CRC_QUIET_USE:                             /* quiet/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE |
+                           PNG_FLAG_CRC_CRITICAL_IGNORE;
+         break;
+      case PNG_CRC_WARN_DISCARD:    /* not a valid action for critical data */
+         png_warning(png_ptr, "Can't discard critical data on CRC error.");
+      case PNG_CRC_ERROR_QUIT:                                /* error/quit */
+      case PNG_CRC_DEFAULT:
+      default:
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         break;
+   }
+
+   switch (ancil_action)
+   {
+      case PNG_CRC_NO_CHANGE:                       /* leave setting as is */
+         break;
+      case PNG_CRC_WARN_USE:                              /* warn/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE;
+         break;
+      case PNG_CRC_QUIET_USE:                            /* quiet/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE |
+                           PNG_FLAG_CRC_ANCILLARY_NOWARN;
+         break;
+      case PNG_CRC_ERROR_QUIT:                               /* error/quit */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_NOWARN;
+         break;
+      case PNG_CRC_WARN_DISCARD:                      /* warn/discard data */
+      case PNG_CRC_DEFAULT:
+      default:
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         break;
+   }
+}
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_FLOATING_POINT_SUPPORTED)
+/* handle alpha and tRNS via a background color */
+void PNGAPI
+png_set_background(png_structp png_ptr,
+   png_color_16p background_color, int background_gamma_code,
+   int need_expand, double background_gamma)
+{
+   png_debug(1, "in png_set_background\n");
+   if(png_ptr == NULL) return;
+   if (background_gamma_code == PNG_BACKGROUND_GAMMA_UNKNOWN)
+   {
+      png_warning(png_ptr, "Application must supply a known background gamma");
+      return;
+   }
+
+   png_ptr->transformations |= PNG_BACKGROUND;
+   png_memcpy(&(png_ptr->background), background_color,
+      png_sizeof(png_color_16));
+   png_ptr->background_gamma = (float)background_gamma;
+   png_ptr->background_gamma_type = (png_byte)(background_gamma_code);
+   png_ptr->transformations |= (need_expand ? PNG_BACKGROUND_EXPAND : 0);
+}
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* strip 16 bit depth files to 8 bit depth */
+void PNGAPI
+png_set_strip_16(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_strip_16\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_16_TO_8;
+}
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_strip_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_strip_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->flags |= PNG_FLAG_STRIP_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* Dither file to 8 bit.  Supply a palette, the current number
+ * of elements in the palette, the maximum number of elements
+ * allowed, and a histogram if possible.  If the current number
+ * of colors is greater then the maximum number, the palette will be
+ * modified to fit in the maximum number.  "full_dither" indicates
+ * whether we need a dithering cube set up for RGB images, or if we
+ * simply are reducing the number of colors in a paletted image.
+ */
+
+typedef struct png_dsort_struct
+{
+   struct png_dsort_struct FAR * next;
+   png_byte left;
+   png_byte right;
+} png_dsort;
+typedef png_dsort FAR *       png_dsortp;
+typedef png_dsort FAR * FAR * png_dsortpp;
+
+void PNGAPI
+png_set_dither(png_structp png_ptr, png_colorp palette,
+   int num_palette, int maximum_colors, png_uint_16p histogram,
+   int full_dither)
+{
+   png_debug(1, "in png_set_dither\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_DITHER;
+
+   if (!full_dither)
+   {
+      int i;
+
+      png_ptr->dither_index = (png_bytep)png_malloc(png_ptr,
+         (png_uint_32)(num_palette * png_sizeof (png_byte)));
+      for (i = 0; i < num_palette; i++)
+         png_ptr->dither_index[i] = (png_byte)i;
+   }
+
+   if (num_palette > maximum_colors)
+   {
+      if (histogram != NULL)
+      {
+         /* This is easy enough, just throw out the least used colors.
+            Perhaps not the best solution, but good enough. */
+
+         int i;
+
+         /* initialize an array to sort colors */
+         png_ptr->dither_sort = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+
+         /* initialize the dither_sort array */
+         for (i = 0; i < num_palette; i++)
+            png_ptr->dither_sort[i] = (png_byte)i;
+
+         /* Find the least used palette entries by starting a
+            bubble sort, and running it until we have sorted
+            out enough colors.  Note that we don't care about
+            sorting all the colors, just finding which are
+            least used. */
+
+         for (i = num_palette - 1; i >= maximum_colors; i--)
+         {
+            int done; /* to stop early if the list is pre-sorted */
+            int j;
+
+            done = 1;
+            for (j = 0; j < i; j++)
+            {
+               if (histogram[png_ptr->dither_sort[j]]
+                   < histogram[png_ptr->dither_sort[j + 1]])
+               {
+                  png_byte t;
+
+                  t = png_ptr->dither_sort[j];
+                  png_ptr->dither_sort[j] = png_ptr->dither_sort[j + 1];
+                  png_ptr->dither_sort[j + 1] = t;
+                  done = 0;
+               }
+            }
+            if (done)
+               break;
+         }
+
+         /* swap the palette around, and set up a table, if necessary */
+         if (full_dither)
+         {
+            int j = num_palette;
+
+            /* put all the useful colors within the max, but don't
+               move the others */
+            for (i = 0; i < maximum_colors; i++)
+            {
+               if ((int)png_ptr->dither_sort[i] >= maximum_colors)
+               {
+                  do
+                     j--;
+                  while ((int)png_ptr->dither_sort[j] >= maximum_colors);
+                  palette[i] = palette[j];
+               }
+            }
+         }
+         else
+         {
+            int j = num_palette;
+
+            /* move all the used colors inside the max limit, and
+               develop a translation table */
+            for (i = 0; i < maximum_colors; i++)
+            {
+               /* only move the colors we need to */
+               if ((int)png_ptr->dither_sort[i] >= maximum_colors)
+               {
+                  png_color tmp_color;
+
+                  do
+                     j--;
+                  while ((int)png_ptr->dither_sort[j] >= maximum_colors);
+
+                  tmp_color = palette[j];
+                  palette[j] = palette[i];
+                  palette[i] = tmp_color;
+                  /* indicate where the color went */
+                  png_ptr->dither_index[j] = (png_byte)i;
+                  png_ptr->dither_index[i] = (png_byte)j;
+               }
+            }
+
+            /* find closest color for those colors we are not using */
+            for (i = 0; i < num_palette; i++)
+            {
+               if ((int)png_ptr->dither_index[i] >= maximum_colors)
+               {
+                  int min_d, k, min_k, d_index;
+
+                  /* find the closest color to one we threw out */
+                  d_index = png_ptr->dither_index[i];
+                  min_d = PNG_COLOR_DIST(palette[d_index], palette[0]);
+                  for (k = 1, min_k = 0; k < maximum_colors; k++)
+                  {
+                     int d;
+
+                     d = PNG_COLOR_DIST(palette[d_index], palette[k]);
+
+                     if (d < min_d)
+                     {
+                        min_d = d;
+                        min_k = k;
+                     }
+                  }
+                  /* point to closest color */
+                  png_ptr->dither_index[i] = (png_byte)min_k;
+               }
+            }
+         }
+         png_free(png_ptr, png_ptr->dither_sort);
+         png_ptr->dither_sort=NULL;
+      }
+      else
+      {
+         /* This is much harder to do simply (and quickly).  Perhaps
+            we need to go through a median cut routine, but those
+            don't always behave themselves with only a few colors
+            as input.  So we will just find the closest two colors,
+            and throw out one of them (chosen somewhat randomly).
+            [We don't understand this at all, so if someone wants to
+             work on improving it, be our guest - AED, GRP]
+            */
+         int i;
+         int max_d;
+         int num_new_palette;
+         png_dsortp t;
+         png_dsortpp hash;
+
+         t=NULL;
+
+         /* initialize palette index arrays */
+         png_ptr->index_to_palette = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+         png_ptr->palette_to_index = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+
+         /* initialize the sort array */
+         for (i = 0; i < num_palette; i++)
+         {
+            png_ptr->index_to_palette[i] = (png_byte)i;
+            png_ptr->palette_to_index[i] = (png_byte)i;
+         }
+
+         hash = (png_dsortpp)png_malloc(png_ptr, (png_uint_32)(769 *
+            png_sizeof (png_dsortp)));
+         for (i = 0; i < 769; i++)
+            hash[i] = NULL;
+/*         png_memset(hash, 0, 769 * png_sizeof (png_dsortp)); */
+
+         num_new_palette = num_palette;
+
+         /* initial wild guess at how far apart the farthest pixel
+            pair we will be eliminating will be.  Larger
+            numbers mean more areas will be allocated, Smaller
+            numbers run the risk of not saving enough data, and
+            having to do this all over again.
+
+            I have not done extensive checking on this number.
+            */
+         max_d = 96;
+
+         while (num_new_palette > maximum_colors)
+         {
+            for (i = 0; i < num_new_palette - 1; i++)
+            {
+               int j;
+
+               for (j = i + 1; j < num_new_palette; j++)
+               {
+                  int d;
+
+                  d = PNG_COLOR_DIST(palette[i], palette[j]);
+
+                  if (d <= max_d)
+                  {
+
+                     t = (png_dsortp)png_malloc_warn(png_ptr,
+                         (png_uint_32)(png_sizeof(png_dsort)));
+                     if (t == NULL)
+                         break;
+                     t->next = hash[d];
+                     t->left = (png_byte)i;
+                     t->right = (png_byte)j;
+                     hash[d] = t;
+                  }
+               }
+               if (t == NULL)
+                  break;
+            }
+
+            if (t != NULL)
+            for (i = 0; i <= max_d; i++)
+            {
+               if (hash[i] != NULL)
+               {
+                  png_dsortp p;
+
+                  for (p = hash[i]; p; p = p->next)
+                  {
+                     if ((int)png_ptr->index_to_palette[p->left]
+                        < num_new_palette &&
+                        (int)png_ptr->index_to_palette[p->right]
+                        < num_new_palette)
+                     {
+                        int j, next_j;
+
+                        if (num_new_palette & 0x01)
+                        {
+                           j = p->left;
+                           next_j = p->right;
+                        }
+                        else
+                        {
+                           j = p->right;
+                           next_j = p->left;
+                        }
+
+                        num_new_palette--;
+                        palette[png_ptr->index_to_palette[j]]
+                          = palette[num_new_palette];
+                        if (!full_dither)
+                        {
+                           int k;
+
+                           for (k = 0; k < num_palette; k++)
+                           {
+                              if (png_ptr->dither_index[k] ==
+                                 png_ptr->index_to_palette[j])
+                                 png_ptr->dither_index[k] =
+                                    png_ptr->index_to_palette[next_j];
+                              if ((int)png_ptr->dither_index[k] ==
+                                 num_new_palette)
+                                 png_ptr->dither_index[k] =
+                                    png_ptr->index_to_palette[j];
+                           }
+                        }
+
+                        png_ptr->index_to_palette[png_ptr->palette_to_index
+                           [num_new_palette]] = png_ptr->index_to_palette[j];
+                        png_ptr->palette_to_index[png_ptr->index_to_palette[j]]
+                           = png_ptr->palette_to_index[num_new_palette];
+
+                        png_ptr->index_to_palette[j] = (png_byte)num_new_palette;
+                        png_ptr->palette_to_index[num_new_palette] = (png_byte)j;
+                     }
+                     if (num_new_palette <= maximum_colors)
+                        break;
+                  }
+                  if (num_new_palette <= maximum_colors)
+                     break;
+               }
+            }
+
+            for (i = 0; i < 769; i++)
+            {
+               if (hash[i] != NULL)
+               {
+                  png_dsortp p = hash[i];
+                  while (p)
+                  {
+                     t = p->next;
+                     png_free(png_ptr, p);
+                     p = t;
+                  }
+               }
+               hash[i] = 0;
+            }
+            max_d += 96;
+         }
+         png_free(png_ptr, hash);
+         png_free(png_ptr, png_ptr->palette_to_index);
+         png_free(png_ptr, png_ptr->index_to_palette);
+         png_ptr->palette_to_index=NULL;
+         png_ptr->index_to_palette=NULL;
+      }
+      num_palette = maximum_colors;
+   }
+   if (png_ptr->palette == NULL)
+   {
+      png_ptr->palette = palette;
+   }
+   png_ptr->num_palette = (png_uint_16)num_palette;
+
+   if (full_dither)
+   {
+      int i;
+      png_bytep distance;
+      int total_bits = PNG_DITHER_RED_BITS + PNG_DITHER_GREEN_BITS +
+         PNG_DITHER_BLUE_BITS;
+      int num_red = (1 << PNG_DITHER_RED_BITS);
+      int num_green = (1 << PNG_DITHER_GREEN_BITS);
+      int num_blue = (1 << PNG_DITHER_BLUE_BITS);
+      png_size_t num_entries = ((png_size_t)1 << total_bits);
+
+      png_ptr->palette_lookup = (png_bytep )png_malloc(png_ptr,
+         (png_uint_32)(num_entries * png_sizeof (png_byte)));
+
+      png_memset(png_ptr->palette_lookup, 0, num_entries *
+         png_sizeof (png_byte));
+
+      distance = (png_bytep)png_malloc(png_ptr, (png_uint_32)(num_entries *
+         png_sizeof(png_byte)));
+
+      png_memset(distance, 0xff, num_entries * png_sizeof(png_byte));
+
+      for (i = 0; i < num_palette; i++)
+      {
+         int ir, ig, ib;
+         int r = (palette[i].red >> (8 - PNG_DITHER_RED_BITS));
+         int g = (palette[i].green >> (8 - PNG_DITHER_GREEN_BITS));
+         int b = (palette[i].blue >> (8 - PNG_DITHER_BLUE_BITS));
+
+         for (ir = 0; ir < num_red; ir++)
+         {
+            /* int dr = abs(ir - r); */
+            int dr = ((ir > r) ? ir - r : r - ir);
+            int index_r = (ir << (PNG_DITHER_BLUE_BITS + PNG_DITHER_GREEN_BITS));
+
+            for (ig = 0; ig < num_green; ig++)
+            {
+               /* int dg = abs(ig - g); */
+               int dg = ((ig > g) ? ig - g : g - ig);
+               int dt = dr + dg;
+               int dm = ((dr > dg) ? dr : dg);
+               int index_g = index_r | (ig << PNG_DITHER_BLUE_BITS);
+
+               for (ib = 0; ib < num_blue; ib++)
+               {
+                  int d_index = index_g | ib;
+                  /* int db = abs(ib - b); */
+                  int db = ((ib > b) ? ib - b : b - ib);
+                  int dmax = ((dm > db) ? dm : db);
+                  int d = dmax + dt + db;
+
+                  if (d < (int)distance[d_index])
+                  {
+                     distance[d_index] = (png_byte)d;
+                     png_ptr->palette_lookup[d_index] = (png_byte)i;
+                  }
+               }
+            }
+         }
+      }
+
+      png_free(png_ptr, distance);
+   }
+}
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+/* Transform the image from the file_gamma to the screen_gamma.  We
+ * only do transformations on images where the file_gamma and screen_gamma
+ * are not close reciprocals, otherwise it slows things down slightly, and
+ * also needlessly introduces small errors.
+ *
+ * We will turn off gamma transformation later if no semitransparent entries
+ * are present in the tRNS array for palette images.  We can't do it here
+ * because we don't necessarily have the tRNS chunk yet.
+ */
+void PNGAPI
+png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma)
+{
+   png_debug(1, "in png_set_gamma\n");
+   if(png_ptr == NULL) return;
+   if ((fabs(scrn_gamma * file_gamma - 1.0) > PNG_GAMMA_THRESHOLD) ||
+       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) ||
+       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
+     png_ptr->transformations |= PNG_GAMMA;
+   png_ptr->gamma = (float)file_gamma;
+   png_ptr->screen_gamma = (float)scrn_gamma;
+}
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expand paletted images to RGB, expand grayscale images of
+ * less than 8-bit depth to 8-bit depth, and expand tRNS chunks
+ * to alpha channels.
+ */
+void PNGAPI
+png_set_expand(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_expand\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+}
+
+/* GRR 19990627:  the following three functions currently are identical
+ *  to png_set_expand().  However, it is entirely reasonable that someone
+ *  might wish to expand an indexed image to RGB but *not* expand a single,
+ *  fully transparent palette entry to a full alpha channel--perhaps instead
+ *  convert tRNS to the grayscale/RGB format (16-bit RGB value), or replace
+ *  the transparent color with a particular RGB value, or drop tRNS entirely.
+ *  IOW, a future version of the library may make the transformations flag
+ *  a bit more fine-grained, with separate bits for each of these three
+ *  functions.
+ *
+ *  More to the point, these functions make it obvious what libpng will be
+ *  doing, whereas "expand" can (and does) mean any number of things.
+ *
+ *  GRP 20060307: In libpng-1.4.0, png_set_gray_1_2_4_to_8() was modified
+ *  to expand only the sample depth but not to expand the tRNS to alpha.
+ */
+
+/* Expand paletted images to RGB. */
+void PNGAPI
+png_set_palette_to_rgb(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_palette_to_rgb\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+}
+
+#if !defined(PNG_1_0_X)
+/* Expand grayscale images of less than 8-bit depth to 8 bits. */
+void PNGAPI
+png_set_expand_gray_1_2_4_to_8(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_expand_gray_1_2_4_to_8\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_EXPAND;
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+}
+#endif
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Expand grayscale images of less than 8-bit depth to 8 bits. */
+/* Deprecated as of libpng-1.2.9 */
+void PNGAPI
+png_set_gray_1_2_4_to_8(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_gray_1_2_4_to_8\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+}
+#endif
+
+
+/* Expand tRNS chunks to alpha channels. */
+void PNGAPI
+png_set_tRNS_to_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_tRNS_to_alpha\n");
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+}
+#endif /* defined(PNG_READ_EXPAND_SUPPORTED) */
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+void PNGAPI
+png_set_gray_to_rgb(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_gray_to_rgb\n");
+   png_ptr->transformations |= PNG_GRAY_TO_RGB;
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+#if defined(PNG_FLOATING_POINT_SUPPORTED)
+/* Convert a RGB image to a grayscale of the same width.  This allows us,
+ * for example, to convert a 24 bpp RGB image into an 8 bpp grayscale image.
+ */
+
+void PNGAPI
+png_set_rgb_to_gray(png_structp png_ptr, int error_action, double red,
+   double green)
+{
+      int red_fixed = (int)((float)red*100000.0 + 0.5);
+      int green_fixed = (int)((float)green*100000.0 + 0.5);
+      if(png_ptr == NULL) return;
+      png_set_rgb_to_gray_fixed(png_ptr, error_action, red_fixed, green_fixed);
+}
+#endif
+
+void PNGAPI
+png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
+   png_fixed_point red, png_fixed_point green)
+{
+   png_debug(1, "in png_set_rgb_to_gray\n");
+   if(png_ptr == NULL) return;
+   switch(error_action)
+   {
+      case 1: png_ptr->transformations |= PNG_RGB_TO_GRAY;
+              break;
+      case 2: png_ptr->transformations |= PNG_RGB_TO_GRAY_WARN;
+              break;
+      case 3: png_ptr->transformations |= PNG_RGB_TO_GRAY_ERR;
+   }
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+      png_ptr->transformations |= PNG_EXPAND;
+#else
+   {
+      png_warning(png_ptr, "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED.");
+      png_ptr->transformations &= ~PNG_RGB_TO_GRAY;
+   }
+#endif
+   {
+      png_uint_16 red_int, green_int;
+      if(red < 0 || green < 0)
+      {
+         red_int   =  6968; /* .212671 * 32768 + .5 */
+         green_int = 23434; /* .715160 * 32768 + .5 */
+      }
+      else if(red + green < 100000L)
+      {
+        red_int = (png_uint_16)(((png_uint_32)red*32768L)/100000L);
+        green_int = (png_uint_16)(((png_uint_32)green*32768L)/100000L);
+      }
+      else
+      {
+         png_warning(png_ptr, "ignoring out of range rgb_to_gray coefficients");
+         red_int   =  6968;
+         green_int = 23434;
+      }
+      png_ptr->rgb_to_gray_red_coeff   = red_int;
+      png_ptr->rgb_to_gray_green_coeff = green_int;
+      png_ptr->rgb_to_gray_blue_coeff  = (png_uint_16)(32768-red_int-green_int);
+   }
+}
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+void PNGAPI
+png_set_read_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
+   read_user_transform_fn)
+{
+   png_debug(1, "in png_set_read_user_transform_fn\n");
+   if(png_ptr == NULL) return;
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   png_ptr->transformations |= PNG_USER_TRANSFORM;
+   png_ptr->read_user_transform_fn = read_user_transform_fn;
+#endif
+#ifdef PNG_LEGACY_SUPPORTED
+   if(read_user_transform_fn)
+      png_warning(png_ptr,
+        "This version of libpng does not support user transforms");
+#endif
+}
+#endif
+
+/* Initialize everything needed for the read.  This includes modifying
+ * the palette.
+ */
+void /* PRIVATE */
+png_init_read_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_init_read_transformations\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if(png_ptr != NULL)
+#endif
+  {
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || defined(PNG_READ_SHIFT_SUPPORTED) \
+ || defined(PNG_READ_GAMMA_SUPPORTED)
+   int color_type = png_ptr->color_type;
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* Detect gray background and attempt to enable optimization
+    * for gray --> RGB case */
+   /* Note:  if PNG_BACKGROUND_EXPAND is set and color_type is either RGB or
+    * RGB_ALPHA (in which case need_expand is superfluous anyway), the
+    * background color might actually be gray yet not be flagged as such.
+    * This is not a problem for the current code, which uses
+    * PNG_BACKGROUND_IS_GRAY only to decide when to do the
+    * png_do_gray_to_rgb() transformation.
+    */
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+       !(color_type & PNG_COLOR_MASK_COLOR))
+   {
+          png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
+   } else if ((png_ptr->transformations & PNG_BACKGROUND) &&
+              !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+              (png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+              png_ptr->background.red == png_ptr->background.green &&
+              png_ptr->background.red == png_ptr->background.blue)
+   {
+          png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
+          png_ptr->background.gray = png_ptr->background.red;
+   }
+#endif
+
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+       (png_ptr->transformations & PNG_EXPAND))
+   {
+      if (!(color_type & PNG_COLOR_MASK_COLOR))  /* i.e., GRAY or GRAY_ALPHA */
+      {
+         /* expand background and tRNS chunks */
+         switch (png_ptr->bit_depth)
+         {
+            case 1:
+               png_ptr->background.gray *= (png_uint_16)0xff;
+               png_ptr->background.red = png_ptr->background.green
+                 =  png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0xff;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 2:
+               png_ptr->background.gray *= (png_uint_16)0x55;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0x55;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 4:
+               png_ptr->background.gray *= (png_uint_16)0x11;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0x11;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 8:
+            case 16:
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               break;
+         }
+      }
+      else if (color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_ptr->background.red   =
+            png_ptr->palette[png_ptr->background.index].red;
+         png_ptr->background.green =
+            png_ptr->palette[png_ptr->background.index].green;
+         png_ptr->background.blue  =
+            png_ptr->palette[png_ptr->background.index].blue;
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+        if (png_ptr->transformations & PNG_INVERT_ALPHA)
+        {
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+           if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+#endif
+           {
+           /* invert the alpha channel (in tRNS) unless the pixels are
+              going to be expanded, in which case leave it for later */
+              int i,istop;
+              istop=(int)png_ptr->num_trans;
+              for (i=0; i<istop; i++)
+                 png_ptr->trans[i] = (png_byte)(255 - png_ptr->trans[i]);
+           }
+        }
+#endif
+
+      }
+   }
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+   png_ptr->background_1 = png_ptr->background;
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+
+   if ((color_type == PNG_COLOR_TYPE_PALETTE && png_ptr->num_trans != 0)
+       && (fabs(png_ptr->screen_gamma * png_ptr->gamma - 1.0)
+         < PNG_GAMMA_THRESHOLD))
+   {
+    int i,k;
+    k=0;
+    for (i=0; i<png_ptr->num_trans; i++)
+    {
+      if (png_ptr->trans[i] != 0 && png_ptr->trans[i] != 0xff)
+        k=1; /* partial transparency is present */
+    }
+    if (k == 0)
+      png_ptr->transformations &= ~PNG_GAMMA;
+   }
+
+   if ((png_ptr->transformations & (PNG_GAMMA | PNG_RGB_TO_GRAY)) &&
+        png_ptr->gamma != 0.0)
+   {
+      png_build_gamma_table(png_ptr);
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+      if (png_ptr->transformations & PNG_BACKGROUND)
+      {
+         if (color_type == PNG_COLOR_TYPE_PALETTE)
+         {
+           /* could skip if no transparency and
+           */
+            png_color back, back_1;
+            png_colorp palette = png_ptr->palette;
+            int num_palette = png_ptr->num_palette;
+            int i;
+            if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
+            {
+               back.red = png_ptr->gamma_table[png_ptr->background.red];
+               back.green = png_ptr->gamma_table[png_ptr->background.green];
+               back.blue = png_ptr->gamma_table[png_ptr->background.blue];
+
+               back_1.red = png_ptr->gamma_to_1[png_ptr->background.red];
+               back_1.green = png_ptr->gamma_to_1[png_ptr->background.green];
+               back_1.blue = png_ptr->gamma_to_1[png_ptr->background.blue];
+            }
+            else
+            {
+               double g, gs;
+
+               switch (png_ptr->background_gamma_type)
+               {
+                  case PNG_BACKGROUND_GAMMA_SCREEN:
+                     g = (png_ptr->screen_gamma);
+                     gs = 1.0;
+                     break;
+                  case PNG_BACKGROUND_GAMMA_FILE:
+                     g = 1.0 / (png_ptr->gamma);
+                     gs = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+                     break;
+                  case PNG_BACKGROUND_GAMMA_UNIQUE:
+                     g = 1.0 / (png_ptr->background_gamma);
+                     gs = 1.0 / (png_ptr->background_gamma *
+                                 png_ptr->screen_gamma);
+                     break;
+                  default:
+                     g = 1.0;    /* back_1 */
+                     gs = 1.0;   /* back */
+               }
+
+               if ( fabs(gs - 1.0) < PNG_GAMMA_THRESHOLD)
+               {
+                  back.red   = (png_byte)png_ptr->background.red;
+                  back.green = (png_byte)png_ptr->background.green;
+                  back.blue  = (png_byte)png_ptr->background.blue;
+               }
+               else
+               {
+                  back.red = (png_byte)(pow(
+                     (double)png_ptr->background.red/255, gs) * 255.0 + .5);
+                  back.green = (png_byte)(pow(
+                     (double)png_ptr->background.green/255, gs) * 255.0 + .5);
+                  back.blue = (png_byte)(pow(
+                     (double)png_ptr->background.blue/255, gs) * 255.0 + .5);
+               }
+
+               back_1.red = (png_byte)(pow(
+                  (double)png_ptr->background.red/255, g) * 255.0 + .5);
+               back_1.green = (png_byte)(pow(
+                  (double)png_ptr->background.green/255, g) * 255.0 + .5);
+               back_1.blue = (png_byte)(pow(
+                  (double)png_ptr->background.blue/255, g) * 255.0 + .5);
+            }
+            for (i = 0; i < num_palette; i++)
+            {
+               if (i < (int)png_ptr->num_trans && png_ptr->trans[i] != 0xff)
+               {
+                  if (png_ptr->trans[i] == 0)
+                  {
+                     palette[i] = back;
+                  }
+                  else /* if (png_ptr->trans[i] != 0xff) */
+                  {
+                     png_byte v, w;
+
+                     v = png_ptr->gamma_to_1[palette[i].red];
+                     png_composite(w, v, png_ptr->trans[i], back_1.red);
+                     palette[i].red = png_ptr->gamma_from_1[w];
+
+                     v = png_ptr->gamma_to_1[palette[i].green];
+                     png_composite(w, v, png_ptr->trans[i], back_1.green);
+                     palette[i].green = png_ptr->gamma_from_1[w];
+
+                     v = png_ptr->gamma_to_1[palette[i].blue];
+                     png_composite(w, v, png_ptr->trans[i], back_1.blue);
+                     palette[i].blue = png_ptr->gamma_from_1[w];
+                  }
+               }
+               else
+               {
+                  palette[i].red = png_ptr->gamma_table[palette[i].red];
+                  palette[i].green = png_ptr->gamma_table[palette[i].green];
+                  palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+               }
+            }
+	    /* Prevent the transformations being done again, and make sure
+	     * that the now spurious alpha channel is stripped - the code
+	     * has just reduced background composition and gamma correction
+	     * to a simple alpha channel strip.
+	     */
+	    png_ptr->transformations &= ~PNG_BACKGROUND;
+	    png_ptr->transformations &= ~PNG_GAMMA;
+	    png_ptr->transformations |= PNG_STRIP_ALPHA;
+         }
+         /* if (png_ptr->background_gamma_type!=PNG_BACKGROUND_GAMMA_UNKNOWN) */
+         else
+         /* color_type != PNG_COLOR_TYPE_PALETTE */
+         {
+            double m = (double)(((png_uint_32)1 << png_ptr->bit_depth) - 1);
+            double g = 1.0;
+            double gs = 1.0;
+
+            switch (png_ptr->background_gamma_type)
+            {
+               case PNG_BACKGROUND_GAMMA_SCREEN:
+                  g = (png_ptr->screen_gamma);
+                  gs = 1.0;
+                  break;
+               case PNG_BACKGROUND_GAMMA_FILE:
+                  g = 1.0 / (png_ptr->gamma);
+                  gs = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+                  break;
+               case PNG_BACKGROUND_GAMMA_UNIQUE:
+                  g = 1.0 / (png_ptr->background_gamma);
+                  gs = 1.0 / (png_ptr->background_gamma *
+                     png_ptr->screen_gamma);
+                  break;
+            }
+
+            png_ptr->background_1.gray = (png_uint_16)(pow(
+               (double)png_ptr->background.gray / m, g) * m + .5);
+            png_ptr->background.gray = (png_uint_16)(pow(
+               (double)png_ptr->background.gray / m, gs) * m + .5);
+
+            if ((png_ptr->background.red != png_ptr->background.green) ||
+                (png_ptr->background.red != png_ptr->background.blue) ||
+                (png_ptr->background.red != png_ptr->background.gray))
+            {
+               /* RGB or RGBA with color background */
+               png_ptr->background_1.red = (png_uint_16)(pow(
+                  (double)png_ptr->background.red / m, g) * m + .5);
+               png_ptr->background_1.green = (png_uint_16)(pow(
+                  (double)png_ptr->background.green / m, g) * m + .5);
+               png_ptr->background_1.blue = (png_uint_16)(pow(
+                  (double)png_ptr->background.blue / m, g) * m + .5);
+               png_ptr->background.red = (png_uint_16)(pow(
+                  (double)png_ptr->background.red / m, gs) * m + .5);
+               png_ptr->background.green = (png_uint_16)(pow(
+                  (double)png_ptr->background.green / m, gs) * m + .5);
+               png_ptr->background.blue = (png_uint_16)(pow(
+                  (double)png_ptr->background.blue / m, gs) * m + .5);
+            }
+            else
+            {
+               /* GRAY, GRAY ALPHA, RGB, or RGBA with gray background */
+               png_ptr->background_1.red = png_ptr->background_1.green
+                 = png_ptr->background_1.blue = png_ptr->background_1.gray;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+            }
+         }
+      }
+      else
+      /* transformation does not include PNG_BACKGROUND */
+#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+      if (color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_colorp palette = png_ptr->palette;
+         int num_palette = png_ptr->num_palette;
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            palette[i].red = png_ptr->gamma_table[palette[i].red];
+            palette[i].green = png_ptr->gamma_table[palette[i].green];
+            palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+         }
+
+	 /* Done the gamma correction. */
+	 png_ptr->transformations &= ~PNG_GAMMA;
+      }
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   else
+#endif
+#endif /* PNG_READ_GAMMA_SUPPORTED && PNG_FLOATING_POINT_SUPPORTED */
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* No GAMMA transformation */
+   if ((png_ptr->transformations & PNG_BACKGROUND) &&
+       (color_type == PNG_COLOR_TYPE_PALETTE))
+   {
+      int i;
+      int istop = (int)png_ptr->num_trans;
+      png_color back;
+      png_colorp palette = png_ptr->palette;
+
+      back.red   = (png_byte)png_ptr->background.red;
+      back.green = (png_byte)png_ptr->background.green;
+      back.blue  = (png_byte)png_ptr->background.blue;
+
+      for (i = 0; i < istop; i++)
+      {
+         if (png_ptr->trans[i] == 0)
+         {
+            palette[i] = back;
+         }
+         else if (png_ptr->trans[i] != 0xff)
+         {
+            /* The png_composite() macro is defined in png.h */
+            png_composite(palette[i].red, palette[i].red,
+               png_ptr->trans[i], back.red);
+            png_composite(palette[i].green, palette[i].green,
+               png_ptr->trans[i], back.green);
+            png_composite(palette[i].blue, palette[i].blue,
+               png_ptr->trans[i], back.blue);
+         }
+      }
+
+      /* Handled alpha, still need to strip the channel. */
+      png_ptr->transformations &= ~PNG_BACKGROUND;
+      png_ptr->transformations |= PNG_STRIP_ALPHA;
+   }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   if ((png_ptr->transformations & PNG_SHIFT) &&
+      (color_type == PNG_COLOR_TYPE_PALETTE))
+   {
+      png_uint_16 i;
+      png_uint_16 istop = png_ptr->num_palette;
+      int sr = 8 - png_ptr->sig_bit.red;
+      int sg = 8 - png_ptr->sig_bit.green;
+      int sb = 8 - png_ptr->sig_bit.blue;
+
+      if (sr < 0 || sr > 8)
+         sr = 0;
+      if (sg < 0 || sg > 8)
+         sg = 0;
+      if (sb < 0 || sb > 8)
+         sb = 0;
+      for (i = 0; i < istop; i++)
+      {
+         png_ptr->palette[i].red >>= sr;
+         png_ptr->palette[i].green >>= sg;
+         png_ptr->palette[i].blue >>= sb;
+      }
+   }
+#endif  /* PNG_READ_SHIFT_SUPPORTED */
+ }
+#if !defined(PNG_READ_GAMMA_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED) \
+ && !defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if(png_ptr)
+      return;
+#endif
+}
+
+/* Modify the info structure to reflect the transformations.  The
+ * info should be updated so a PNG file could be written with it,
+ * assuming the transformations result in valid PNG data.
+ */
+void /* PRIVATE */
+png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_read_transform_info\n");
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (png_ptr->num_trans &&
+              (png_ptr->transformations & PNG_EXPAND_tRNS))
+            info_ptr->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+         else
+            info_ptr->color_type = PNG_COLOR_TYPE_RGB;
+         info_ptr->bit_depth = 8;
+         info_ptr->num_trans = 0;
+      }
+      else
+      {
+         if (png_ptr->num_trans)
+         {
+            if (png_ptr->transformations & PNG_EXPAND_tRNS)
+              info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
+#if 0 /* Removed from libpng-1.2.27 */
+            else
+              info_ptr->color_type |= PNG_COLOR_MASK_COLOR;
+#endif
+         }
+         if (info_ptr->bit_depth < 8)
+            info_ptr->bit_depth = 8;
+         info_ptr->num_trans = 0;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->transformations & PNG_BACKGROUND)
+   {
+      info_ptr->color_type &= ~PNG_COLOR_MASK_ALPHA;
+      info_ptr->num_trans = 0;
+      info_ptr->background = png_ptr->background;
+   }
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->transformations & PNG_GAMMA)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      info_ptr->gamma = png_ptr->gamma;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      info_ptr->int_gamma = png_ptr->int_gamma;
+#endif
+   }
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   if ((png_ptr->transformations & PNG_16_TO_8) && (info_ptr->bit_depth == 16))
+      info_ptr->bit_depth = 8;
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+      info_ptr->color_type |= PNG_COLOR_MASK_COLOR;
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+      info_ptr->color_type &= ~PNG_COLOR_MASK_COLOR;
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   if (png_ptr->transformations & PNG_DITHER)
+   {
+      if (((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
+         (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)) &&
+         png_ptr->palette_lookup && info_ptr->bit_depth == 8)
+      {
+         info_ptr->color_type = PNG_COLOR_TYPE_PALETTE;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACK) && (info_ptr->bit_depth < 8))
+      info_ptr->bit_depth = 8;
+#endif
+
+   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      info_ptr->channels = 1;
+   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      info_ptr->channels = 3;
+   else
+      info_ptr->channels = 1;
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_STRIP_ALPHA)
+      info_ptr->color_type &= ~PNG_COLOR_MASK_ALPHA;
+#endif
+
+   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
+      info_ptr->channels++;
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   /* STRIP_ALPHA and FILLER allowed:  MASK_ALPHA bit stripped above */
+   if ((png_ptr->transformations & PNG_FILLER) &&
+       ((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
+       (info_ptr->color_type == PNG_COLOR_TYPE_GRAY)))
+   {
+      info_ptr->channels++;
+      /* if adding a true alpha channel not just filler */
+#if !defined(PNG_1_0_X)
+      if (png_ptr->transformations & PNG_ADD_ALPHA)
+        info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
+#endif
+   }
+#endif
+
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \
+defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   if(png_ptr->transformations & PNG_USER_TRANSFORM)
+     {
+       if(info_ptr->bit_depth < png_ptr->user_transform_depth)
+         info_ptr->bit_depth = png_ptr->user_transform_depth;
+       if(info_ptr->channels < png_ptr->user_transform_channels)
+         info_ptr->channels = png_ptr->user_transform_channels;
+     }
+#endif
+
+   info_ptr->pixel_depth = (png_byte)(info_ptr->channels *
+      info_ptr->bit_depth);
+
+   info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth,info_ptr->width);
+
+#if !defined(PNG_READ_EXPAND_SUPPORTED)
+   if(png_ptr)
+      return;
+#endif
+}
+
+/* Transform the row.  The order of transformations is significant,
+ * and is very touchy.  If you add a transformation, take care to
+ * decide how it fits in with the other transformations here.
+ */
+void /* PRIVATE */
+png_do_read_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_do_read_transformations\n");
+   if (png_ptr->row_buf == NULL)
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char msg[50];
+
+      png_snprintf2(msg, 50,
+         "NULL row buffer for row %ld, pass %d", png_ptr->row_number,
+         png_ptr->pass);
+      png_error(png_ptr, msg);
+#else
+      png_error(png_ptr, "NULL row buffer");
+#endif
+   }
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      /* Application has failed to call either png_read_start_image()
+       * or png_read_update_info() after setting transforms that expand
+       * pixels.  This check added to libpng-1.2.19 */
+#if (PNG_WARN_UNINITIALIZED_ROW==1)
+      png_error(png_ptr, "Uninitialized row");
+#else
+      png_warning(png_ptr, "Uninitialized row");
+#endif
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (png_ptr->row_info.color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_do_expand_palette(&(png_ptr->row_info), png_ptr->row_buf + 1,
+            png_ptr->palette, png_ptr->trans, png_ptr->num_trans);
+      }
+      else
+      {
+         if (png_ptr->num_trans &&
+             (png_ptr->transformations & PNG_EXPAND_tRNS))
+            png_do_expand(&(png_ptr->row_info), png_ptr->row_buf + 1,
+               &(png_ptr->trans_values));
+         else
+            png_do_expand(&(png_ptr->row_info), png_ptr->row_buf + 1,
+               NULL);
+      }
+   }
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_STRIP_ALPHA)
+      png_do_strip_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         PNG_FLAG_FILLER_AFTER | (png_ptr->flags & PNG_FLAG_STRIP_ALPHA));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+   {
+      int rgb_error =
+         png_do_rgb_to_gray(png_ptr, &(png_ptr->row_info), png_ptr->row_buf + 1);
+      if(rgb_error)
+      {
+         png_ptr->rgb_to_gray_status=1;
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) == 
+             PNG_RGB_TO_GRAY_WARN)
+            png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
+             PNG_RGB_TO_GRAY_ERR)
+            png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+      }
+   }
+#endif
+
+/*
+From Andreas Dilger e-mail to png-implement, 26 March 1998:
+
+  In most cases, the "simple transparency" should be done prior to doing
+  gray-to-RGB, or you will have to test 3x as many bytes to check if a
+  pixel is transparent.  You would also need to make sure that the
+  transparency information is upgraded to RGB.
+
+  To summarize, the current flow is:
+  - Gray + simple transparency -> compare 1 or 2 gray bytes and composite
+                                  with background "in place" if transparent,
+                                  convert to RGB if necessary
+  - Gray + alpha -> composite with gray background and remove alpha bytes,
+                                  convert to RGB if necessary
+
+  To support RGB backgrounds for gray images we need:
+  - Gray + simple transparency -> convert to RGB + simple transparency, compare
+                                  3 or 6 bytes and composite with background
+                                  "in place" if transparent (3x compare/pixel
+                                  compared to doing composite with gray bkgrnd)
+  - Gray + alpha -> convert to RGB + alpha, composite with background and
+                                  remove alpha bytes (3x float operations/pixel
+                                  compared with composite on gray background)
+
+  Greg's change will do this.  The reason it wasn't done before is for
+  performance, as this increases the per-pixel operations.  If we would check
+  in advance if the background was gray or RGB, and position the gray-to-RGB
+  transform appropriately, then it would save a lot of work/time.
+ */
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* if gray -> RGB, do so now only if background is non-gray; else do later
+    * for performance reasons */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+       !(png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
+      png_do_gray_to_rgb(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if ((png_ptr->transformations & PNG_BACKGROUND) &&
+      ((png_ptr->num_trans != 0 ) ||
+      (png_ptr->color_type & PNG_COLOR_MASK_ALPHA)))
+      png_do_background(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->trans_values), &(png_ptr->background)
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+         , &(png_ptr->background_1),
+         png_ptr->gamma_table, png_ptr->gamma_from_1,
+         png_ptr->gamma_to_1, png_ptr->gamma_16_table,
+         png_ptr->gamma_16_from_1, png_ptr->gamma_16_to_1,
+         png_ptr->gamma_shift
+#endif
+);
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if ((png_ptr->transformations & PNG_GAMMA) &&
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+      !((png_ptr->transformations & PNG_BACKGROUND) &&
+      ((png_ptr->num_trans != 0) ||
+      (png_ptr->color_type & PNG_COLOR_MASK_ALPHA))) &&
+#endif
+      (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE))
+      png_do_gamma(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->gamma_table, png_ptr->gamma_16_table,
+         png_ptr->gamma_shift);
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   if (png_ptr->transformations & PNG_16_TO_8)
+      png_do_chop(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   if (png_ptr->transformations & PNG_DITHER)
+   {
+      png_do_dither((png_row_infop)&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->palette_lookup, png_ptr->dither_index);
+      if(png_ptr->row_info.rowbytes == (png_uint_32)0)
+         png_error(png_ptr, "png_do_dither returned rowbytes=0");
+   }
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_do_invert(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_do_unshift(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->shift));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_do_unpack(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_do_bgr(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_do_packswap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* if gray -> RGB, do so now only if we did not do so above */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
+      png_do_gray_to_rgb(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_do_read_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         (png_uint_32)png_ptr->filler, png_ptr->flags);
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_ALPHA)
+      png_do_read_invert_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_ALPHA)
+      png_do_read_swap_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_do_swap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   if (png_ptr->transformations & PNG_USER_TRANSFORM)
+    {
+      if(png_ptr->read_user_transform_fn != NULL)
+        (*(png_ptr->read_user_transform_fn)) /* user read transform function */
+          (png_ptr,                    /* png_ptr */
+           &(png_ptr->row_info),       /* row_info:     */
+             /*  png_uint_32 width;          width of row */
+             /*  png_uint_32 rowbytes;       number of bytes in row */
+             /*  png_byte color_type;        color type of pixels */
+             /*  png_byte bit_depth;         bit depth of samples */
+             /*  png_byte channels;          number of channels (1-4) */
+             /*  png_byte pixel_depth;       bits per pixel (depth*channels) */
+           png_ptr->row_buf + 1);      /* start of pixel data for row */
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+      if(png_ptr->user_transform_depth)
+         png_ptr->row_info.bit_depth = png_ptr->user_transform_depth;
+      if(png_ptr->user_transform_channels)
+         png_ptr->row_info.channels = png_ptr->user_transform_channels;
+#endif
+      png_ptr->row_info.pixel_depth = (png_byte)(png_ptr->row_info.bit_depth *
+         png_ptr->row_info.channels);
+      png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+         png_ptr->row_info.width);
+   }
+#endif
+
+}
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel,
+ * without changing the actual values.  Thus, if you had a row with
+ * a bit depth of 1, you would end up with bytes that only contained
+ * the numbers 0 or 1.  If you would rather they contain 0 and 255, use
+ * png_do_shift() after this.
+ */
+void /* PRIVATE */
+png_do_unpack(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_unpack\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && row_info->bit_depth < 8)
+#else
+   if (row_info->bit_depth < 8)
+#endif
+   {
+      png_uint_32 i;
+      png_uint_32 row_width=row_info->width;
+
+      switch (row_info->bit_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 3);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = 7 - (int)((row_width + 7) & 0x07);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x01);
+               if (shift == 7)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift++;
+
+               dp--;
+            }
+            break;
+         }
+         case 2:
+         {
+
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 2);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x03);
+               if (shift == 6)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift += 2;
+
+               dp--;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 1);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x0f);
+               if (shift == 4)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift = 4;
+
+               dp--;
+            }
+            break;
+         }
+      }
+      row_info->bit_depth = 8;
+      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
+      row_info->rowbytes = row_width * row_info->channels;
+   }
+}
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+/* Reverse the effects of png_do_shift.  This routine merely shifts the
+ * pixels back to their significant bits values.  Thus, if you have
+ * a row of bit depth 8, but only 5 are significant, this will shift
+ * the values back to 0 through 31.
+ */
+void /* PRIVATE */
+png_do_unshift(png_row_infop row_info, png_bytep row, png_color_8p sig_bits)
+{
+   png_debug(1, "in png_do_unshift\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL && sig_bits != NULL &&
+#endif
+       row_info->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      int shift[4];
+      int channels = 0;
+      int c;
+      png_uint_16 value = 0;
+      png_uint_32 row_width = row_info->width;
+
+      if (row_info->color_type & PNG_COLOR_MASK_COLOR)
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->red;
+         shift[channels++] = row_info->bit_depth - sig_bits->green;
+         shift[channels++] = row_info->bit_depth - sig_bits->blue;
+      }
+      else
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->gray;
+      }
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->alpha;
+      }
+
+      for (c = 0; c < channels; c++)
+      {
+         if (shift[c] <= 0)
+            shift[c] = 0;
+         else
+            value = 1;
+      }
+
+      if (!value)
+         return;
+
+      switch (row_info->bit_depth)
+      {
+         case 2:
+         {
+            png_bytep bp;
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+
+            for (bp = row, i = 0; i < istop; i++)
+            {
+               *bp >>= 1;
+               *bp++ &= 0x55;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+            png_byte mask = (png_byte)((((int)0xf0 >> shift[0]) & (int)0xf0) |
+               (png_byte)((int)0xf >> shift[0]));
+
+            for (i = 0; i < istop; i++)
+            {
+               *bp >>= shift[0];
+               *bp++ &= mask;
+            }
+            break;
+         }
+         case 8:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = row_width * channels;
+
+            for (i = 0; i < istop; i++)
+            {
+               *bp++ >>= shift[i%channels];
+            }
+            break;
+         }
+         case 16:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = channels * row_width;
+
+            for (i = 0; i < istop; i++)
+            {
+               value = (png_uint_16)((*bp << 8) + *(bp + 1));
+               value >>= shift[i%channels];
+               *bp++ = (png_byte)(value >> 8);
+               *bp++ = (png_byte)(value & 0xff);
+            }
+            break;
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* chop rows of bit depth 16 down to 8 */
+void /* PRIVATE */
+png_do_chop(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_chop\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && row_info->bit_depth == 16)
+#else
+   if (row_info->bit_depth == 16)
+#endif
+   {
+      png_bytep sp = row;
+      png_bytep dp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->width * row_info->channels;
+
+      for (i = 0; i<istop; i++, sp += 2, dp++)
+      {
+#if defined(PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED)
+      /* This does a more accurate scaling of the 16-bit color
+       * value, rather than a simple low-byte truncation.
+       *
+       * What the ideal calculation should be:
+       *   *dp = (((((png_uint_32)(*sp) << 8) |
+       *          (png_uint_32)(*(sp + 1))) * 255 + 127) / (png_uint_32)65535L;
+       *
+       * GRR: no, I think this is what it really should be:
+       *   *dp = (((((png_uint_32)(*sp) << 8) |
+       *           (png_uint_32)(*(sp + 1))) + 128L) / (png_uint_32)257L;
+       *
+       * GRR: here's the exact calculation with shifts:
+       *   temp = (((png_uint_32)(*sp) << 8) | (png_uint_32)(*(sp + 1))) + 128L;
+       *   *dp = (temp - (temp >> 8)) >> 8;
+       *
+       * Approximate calculation with shift/add instead of multiply/divide:
+       *   *dp = ((((png_uint_32)(*sp) << 8) |
+       *          (png_uint_32)((int)(*(sp + 1)) - *sp)) + 128) >> 8;
+       *
+       * What we actually do to avoid extra shifting and conversion:
+       */
+
+         *dp = *sp + ((((int)(*(sp + 1)) - *sp) > 128) ? 1 : 0);
+#else
+       /* Simply discard the low order byte */
+         *dp = *sp;
+#endif
+      }
+      row_info->bit_depth = 8;
+      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
+      row_info->rowbytes = row_info->width * row_info->channels;
+   }
+}
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_read_swap_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_swap_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This converts from RGBA to ARGB */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save;
+            }
+         }
+         /* This converts from RRGGBBAA to AARRGGBB */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save[2];
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save[0] = *(--sp);
+               save[1] = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save[0];
+               *(--dp) = save[1];
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This converts from GA to AG */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save;
+            }
+         }
+         /* This converts from GGAA to AAGG */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save[2];
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save[0] = *(--sp);
+               save[1] = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save[0];
+               *(--dp) = save[1];
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_read_invert_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_invert_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This inverts the alpha channel in RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+
+/*             This does nothing:
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               We can replace it with:
+*/
+               sp-=3;
+               dp=sp;
+            }
+         }
+         /* This inverts the alpha channel in RRGGBBAA */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = (png_byte)(255 - *(--sp));
+
+/*             This does nothing:
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               We can replace it with:
+*/
+               sp-=6;
+               dp=sp;
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This inverts the alpha channel in GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = *(--sp);
+            }
+         }
+         /* This inverts the alpha channel in GGAA */
+         else
+         {
+            png_bytep sp  = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = (png_byte)(255 - *(--sp));
+/*
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+*/
+               sp-=2;
+               dp=sp;
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+/* Add filler channel if we have RGB color */
+void /* PRIVATE */
+png_do_read_filler(png_row_infop row_info, png_bytep row,
+   png_uint_32 filler, png_uint_32 flags)
+{
+   png_uint_32 i;
+   png_uint_32 row_width = row_info->width;
+
+   png_byte hi_filler = (png_byte)((filler>>8) & 0xff);
+   png_byte lo_filler = (png_byte)(filler & 0xff);
+
+   png_debug(1, "in png_do_read_filler\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL  && row_info != NULL &&
+#endif
+       row_info->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      if(row_info->bit_depth == 8)
+      {
+         /* This changes the data from G to GX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width;
+            png_bytep dp =  sp + (png_size_t)row_width;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = lo_filler;
+            row_info->channels = 2;
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+      /* This changes the data from G to XG */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width;
+            png_bytep dp = sp  + (png_size_t)row_width;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 2;
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+      }
+      else if(row_info->bit_depth == 16)
+      {
+         /* This changes the data from GG to GGXX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = hi_filler;
+            *(--dp) = lo_filler;
+            row_info->channels = 2;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+         /* This changes the data from GG to XXGG */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 2;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      }
+   } /* COLOR_TYPE == GRAY */
+   else if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      if(row_info->bit_depth == 8)
+      {
+         /* This changes the data from RGB to RGBX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 3;
+            png_bytep dp = sp  + (png_size_t)row_width;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = lo_filler;
+            row_info->channels = 4;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      /* This changes the data from RGB to XRGB */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 3;
+            png_bytep dp = sp + (png_size_t)row_width;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 4;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      }
+      else if(row_info->bit_depth == 16)
+      {
+         /* This changes the data from RRGGBB to RRGGBBXX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 6;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = hi_filler;
+            *(--dp) = lo_filler;
+            row_info->channels = 4;
+            row_info->pixel_depth = 64;
+            row_info->rowbytes = row_width * 8;
+         }
+         /* This changes the data from RRGGBB to XXRRGGBB */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 6;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 4;
+            row_info->pixel_depth = 64;
+            row_info->rowbytes = row_width * 8;
+         }
+      }
+   } /* COLOR_TYPE == RGB */
+}
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+/* expand grayscale files to RGB, with or without alpha */
+void /* PRIVATE */
+png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
+{
+   png_uint_32 i;
+   png_uint_32 row_width = row_info->width;
+
+   png_debug(1, "in png_do_gray_to_rgb\n");
+   if (row_info->bit_depth >= 8 &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      !(row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + (png_size_t)row_width - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *sp;
+               *(dp--) = *sp;
+               *(dp--) = *(sp--);
+            }
+         }
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *(sp--);
+               *(dp--) = *sp;
+               *(dp--) = *sp;
+               *(dp--) = *(sp--);
+            }
+         }
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 4 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+            }
+         }
+      }
+      row_info->channels += (png_byte)2;
+      row_info->color_type |= PNG_COLOR_MASK_COLOR;
+      row_info->pixel_depth = (png_byte)(row_info->channels *
+         row_info->bit_depth);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+   }
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+/* reduce RGB files to grayscale, with or without alpha
+ * using the equation given in Poynton's ColorFAQ at
+ * <http://www.inforamp.net/~poynton/>
+ * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
+ *
+ *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
+ *
+ *  We approximate this with
+ *
+ *     Y = 0.21268 * R    + 0.7151 * G    + 0.07217 * B
+ *
+ *  which can be expressed with integers as
+ *
+ *     Y = (6969 * R + 23434 * G + 2365 * B)/32768
+ *
+ *  The calculation is to be done in a linear colorspace.
+ *
+ *  Other integer coefficents can be used via png_set_rgb_to_gray().
+ */
+int /* PRIVATE */
+png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
+
+{
+   png_uint_32 i;
+
+   png_uint_32 row_width = row_info->width;
+   int rgb_error = 0;
+
+   png_debug(1, "in png_do_rgb_to_gray\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff;
+      png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff;
+      png_uint_32 bc = png_ptr->rgb_to_gray_blue_coeff;
+
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (row_info->bit_depth == 8)
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_from_1 != NULL && png_ptr->gamma_to_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte green = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte blue  = png_ptr->gamma_to_1[*(sp++)];
+                  if(red != green || red != blue)
+                  {
+                     rgb_error |= 1;
+                     *(dp++) = png_ptr->gamma_from_1[
+                       (rc*red+gc*green+bc*blue)>>15];
+                  }
+                  else
+                     *(dp++) = *(sp-1);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = *(sp++);
+                  png_byte green = *(sp++);
+                  png_byte blue  = *(sp++);
+                  if(red != green || red != blue)
+                  {
+                     rgb_error |= 1;
+                     *(dp++) = (png_byte)((rc*red+gc*green+bc*blue)>>15);
+                  }
+                  else
+                     *(dp++) = *(sp-1);
+               }
+            }
+         }
+
+         else /* RGB bit_depth == 16 */
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_16_to_1 != NULL &&
+                png_ptr->gamma_16_from_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, w;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red == green && red == blue)
+                     w = red;
+                  else
+                  {
+                     png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red&0xff) >>
+                                  png_ptr->gamma_shift][red>>8];
+                     png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
+                                  png_ptr->gamma_shift][green>>8];
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
+                                  png_ptr->gamma_shift][blue>>8];
+                     png_uint_16 gray16  = (png_uint_16)((rc*red_1 + gc*green_1
+                                  + bc*blue_1)>>15);
+                     w = png_ptr->gamma_16_from_1[(gray16&0xff) >>
+                         png_ptr->gamma_shift][gray16 >> 8];
+                     rgb_error |= 1;
+                  }
+
+                  *(dp++) = (png_byte)((w>>8) & 0xff);
+                  *(dp++) = (png_byte)(w & 0xff);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, gray16;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  gray16  = (png_uint_16)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = (png_byte)((gray16>>8) & 0xff);
+                  *(dp++) = (png_byte)(gray16 & 0xff);
+               }
+            }
+         }
+      }
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         if (row_info->bit_depth == 8)
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_from_1 != NULL && png_ptr->gamma_to_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte green = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte blue  = png_ptr->gamma_to_1[*(sp++)];
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  *(dp++) =  png_ptr->gamma_from_1
+                             [(rc*red + gc*green + bc*blue)>>15];
+                  *(dp++) = *(sp++);  /* alpha */
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = *(sp++);
+                  png_byte green = *(sp++);
+                  png_byte blue  = *(sp++);
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  *(dp++) =  (png_byte)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = *(sp++);  /* alpha */
+               }
+            }
+         }
+         else /* RGBA bit_depth == 16 */
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_16_to_1 != NULL &&
+                png_ptr->gamma_16_from_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, w;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red == green && red == blue)
+                     w = red;
+                  else
+                  {
+                     png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red&0xff) >>
+                                  png_ptr->gamma_shift][red>>8];
+                     png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
+                                  png_ptr->gamma_shift][green>>8];
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
+                                  png_ptr->gamma_shift][blue>>8];
+                     png_uint_16 gray16  = (png_uint_16)((rc * red_1
+                                  + gc * green_1 + bc * blue_1)>>15);
+                     w = png_ptr->gamma_16_from_1[(gray16&0xff) >>
+                         png_ptr->gamma_shift][gray16 >> 8];
+                     rgb_error |= 1;
+                  }
+
+                  *(dp++) = (png_byte)((w>>8) & 0xff);
+                  *(dp++) = (png_byte)(w & 0xff);
+                  *(dp++) = *(sp++);  /* alpha */
+                  *(dp++) = *(sp++);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, gray16;
+                  red   = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  gray16  = (png_uint_16)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = (png_byte)((gray16>>8) & 0xff);
+                  *(dp++) = (png_byte)(gray16 & 0xff);
+                  *(dp++) = *(sp++);  /* alpha */
+                  *(dp++) = *(sp++);
+               }
+            }
+         }
+      }
+   row_info->channels -= (png_byte)2;
+      row_info->color_type &= ~PNG_COLOR_MASK_COLOR;
+      row_info->pixel_depth = (png_byte)(row_info->channels *
+         row_info->bit_depth);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+   }
+   return rgb_error;
+}
+#endif
+
+/* Build a grayscale palette.  Palette is assumed to be 1 << bit_depth
+ * large of png_color.  This lets grayscale images be treated as
+ * paletted.  Most useful for gamma correction and simplification
+ * of code.
+ */
+void PNGAPI
+png_build_grayscale_palette(int bit_depth, png_colorp palette)
+{
+   int num_palette;
+   int color_inc;
+   int i;
+   int v;
+
+   png_debug(1, "in png_do_build_grayscale_palette\n");
+   if (palette == NULL)
+      return;
+
+   switch (bit_depth)
+   {
+      case 1:
+         num_palette = 2;
+         color_inc = 0xff;
+         break;
+      case 2:
+         num_palette = 4;
+         color_inc = 0x55;
+         break;
+      case 4:
+         num_palette = 16;
+         color_inc = 0x11;
+         break;
+      case 8:
+         num_palette = 256;
+         color_inc = 1;
+         break;
+      default:
+         num_palette = 0;
+         color_inc = 0;
+         break;
+   }
+
+   for (i = 0, v = 0; i < num_palette; i++, v += color_inc)
+   {
+      palette[i].red = (png_byte)v;
+      palette[i].green = (png_byte)v;
+      palette[i].blue = (png_byte)v;
+   }
+}
+
+/* This function is currently unused.  Do we really need it? */
+#if defined(PNG_READ_DITHER_SUPPORTED) && defined(PNG_CORRECT_PALETTE_SUPPORTED)
+void /* PRIVATE */
+png_correct_palette(png_structp png_ptr, png_colorp palette,
+   int num_palette)
+{
+   png_debug(1, "in png_correct_palette\n");
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+   if (png_ptr->transformations & (PNG_GAMMA | PNG_BACKGROUND))
+   {
+      png_color back, back_1;
+
+      if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
+      {
+         back.red = png_ptr->gamma_table[png_ptr->background.red];
+         back.green = png_ptr->gamma_table[png_ptr->background.green];
+         back.blue = png_ptr->gamma_table[png_ptr->background.blue];
+
+         back_1.red = png_ptr->gamma_to_1[png_ptr->background.red];
+         back_1.green = png_ptr->gamma_to_1[png_ptr->background.green];
+         back_1.blue = png_ptr->gamma_to_1[png_ptr->background.blue];
+      }
+      else
+      {
+         double g;
+
+         g = 1.0 / (png_ptr->background_gamma * png_ptr->screen_gamma);
+
+         if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_SCREEN ||
+             fabs(g - 1.0) < PNG_GAMMA_THRESHOLD)
+         {
+            back.red = png_ptr->background.red;
+            back.green = png_ptr->background.green;
+            back.blue = png_ptr->background.blue;
+         }
+         else
+         {
+            back.red =
+               (png_byte)(pow((double)png_ptr->background.red/255, g) *
+                255.0 + 0.5);
+            back.green =
+               (png_byte)(pow((double)png_ptr->background.green/255, g) *
+                255.0 + 0.5);
+            back.blue =
+               (png_byte)(pow((double)png_ptr->background.blue/255, g) *
+                255.0 + 0.5);
+         }
+
+         g = 1.0 / png_ptr->background_gamma;
+
+         back_1.red =
+            (png_byte)(pow((double)png_ptr->background.red/255, g) *
+             255.0 + 0.5);
+         back_1.green =
+            (png_byte)(pow((double)png_ptr->background.green/255, g) *
+             255.0 + 0.5);
+         back_1.blue =
+            (png_byte)(pow((double)png_ptr->background.blue/255, g) *
+             255.0 + 0.5);
+      }
+
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_uint_32 i;
+
+         for (i = 0; i < (png_uint_32)num_palette; i++)
+         {
+            if (i < png_ptr->num_trans && png_ptr->trans[i] == 0)
+            {
+               palette[i] = back;
+            }
+            else if (i < png_ptr->num_trans && png_ptr->trans[i] != 0xff)
+            {
+               png_byte v, w;
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].red];
+               png_composite(w, v, png_ptr->trans[i], back_1.red);
+               palette[i].red = png_ptr->gamma_from_1[w];
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].green];
+               png_composite(w, v, png_ptr->trans[i], back_1.green);
+               palette[i].green = png_ptr->gamma_from_1[w];
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].blue];
+               png_composite(w, v, png_ptr->trans[i], back_1.blue);
+               palette[i].blue = png_ptr->gamma_from_1[w];
+            }
+            else
+            {
+               palette[i].red = png_ptr->gamma_table[palette[i].red];
+               palette[i].green = png_ptr->gamma_table[palette[i].green];
+               palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+            }
+         }
+      }
+      else
+      {
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            if (palette[i].red == (png_byte)png_ptr->trans_values.gray)
+            {
+               palette[i] = back;
+            }
+            else
+            {
+               palette[i].red = png_ptr->gamma_table[palette[i].red];
+               palette[i].green = png_ptr->gamma_table[palette[i].green];
+               palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+            }
+         }
+      }
+   }
+   else
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->transformations & PNG_GAMMA)
+   {
+      int i;
+
+      for (i = 0; i < num_palette; i++)
+      {
+         palette[i].red = png_ptr->gamma_table[palette[i].red];
+         palette[i].green = png_ptr->gamma_table[palette[i].green];
+         palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+      }
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   else
+#endif
+#endif
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->transformations & PNG_BACKGROUND)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_color back;
+
+         back.red   = (png_byte)png_ptr->background.red;
+         back.green = (png_byte)png_ptr->background.green;
+         back.blue  = (png_byte)png_ptr->background.blue;
+
+         for (i = 0; i < (int)png_ptr->num_trans; i++)
+         {
+            if (png_ptr->trans[i] == 0)
+            {
+               palette[i].red = back.red;
+               palette[i].green = back.green;
+               palette[i].blue = back.blue;
+            }
+            else if (png_ptr->trans[i] != 0xff)
+            {
+               png_composite(palette[i].red, png_ptr->palette[i].red,
+                  png_ptr->trans[i], back.red);
+               png_composite(palette[i].green, png_ptr->palette[i].green,
+                  png_ptr->trans[i], back.green);
+               png_composite(palette[i].blue, png_ptr->palette[i].blue,
+                  png_ptr->trans[i], back.blue);
+            }
+         }
+      }
+      else /* assume grayscale palette (what else could it be?) */
+      {
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            if (i == (png_byte)png_ptr->trans_values.gray)
+            {
+               palette[i].red = (png_byte)png_ptr->background.red;
+               palette[i].green = (png_byte)png_ptr->background.green;
+               palette[i].blue = (png_byte)png_ptr->background.blue;
+            }
+         }
+      }
+   }
+#endif
+}
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+/* Replace any alpha or transparency with the supplied background color.
+ * "background" is already in the screen gamma, while "background_1" is
+ * at a gamma of 1.0.  Paletted files have already been taken care of.
+ */
+void /* PRIVATE */
+png_do_background(png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   , png_color_16p background_1,
+   png_bytep gamma_table, png_bytep gamma_from_1, png_bytep gamma_to_1,
+   png_uint_16pp gamma_16, png_uint_16pp gamma_16_from_1,
+   png_uint_16pp gamma_16_to_1, int gamma_shift
+#endif
+   )
+{
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+   int shift;
+
+   png_debug(1, "in png_do_background\n");
+   if (background != NULL &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      (!(row_info->color_type & PNG_COLOR_MASK_ALPHA) ||
+      (row_info->color_type != PNG_COLOR_TYPE_PALETTE && trans_values)))
+   {
+      switch (row_info->color_type)
+      {
+         case PNG_COLOR_TYPE_GRAY:
+         {
+            switch (row_info->bit_depth)
+            {
+               case 1:
+               {
+                  sp = row;
+                  shift = 7;
+                  for (i = 0; i < row_width; i++)
+                  {
+                     if ((png_uint_16)((*sp >> shift) & 0x01)
+                        == trans_values->gray)
+                     {
+                        *sp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                        *sp |= (png_byte)(background->gray << shift);
+                     }
+                     if (!shift)
+                     {
+                        shift = 7;
+                        sp++;
+                     }
+                     else
+                        shift--;
+                  }
+                  break;
+               }
+               case 2:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     shift = 6;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x03)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        else
+                        {
+                           png_byte p = (png_byte)((*sp >> shift) & 0x03);
+                           png_byte g = (png_byte)((gamma_table [p | (p << 2) |
+                               (p << 4) | (p << 6)] >> 6) & 0x03);
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(g << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 6;
+                           sp++;
+                        }
+                        else
+                           shift -= 2;
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     shift = 6;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x03)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 6;
+                           sp++;
+                        }
+                        else
+                           shift -= 2;
+                     }
+                  }
+                  break;
+               }
+               case 4:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     shift = 4;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x0f)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        else
+                        {
+                           png_byte p = (png_byte)((*sp >> shift) & 0x0f);
+                           png_byte g = (png_byte)((gamma_table[p |
+                             (p << 4)] >> 4) & 0x0f);
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(g << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 4;
+                           sp++;
+                        }
+                        else
+                           shift -= 4;
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     shift = 4;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x0f)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 4;
+                           sp++;
+                        }
+                        else
+                           shift -= 4;
+                     }
+                  }
+                  break;
+               }
+               case 8:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp++)
+                     {
+                        if (*sp == trans_values->gray)
+                        {
+                           *sp = (png_byte)background->gray;
+                        }
+                        else
+                        {
+                           *sp = gamma_table[*sp];
+                        }
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp++)
+                     {
+                        if (*sp == trans_values->gray)
+                        {
+                           *sp = (png_byte)background->gray;
+                        }
+                     }
+                  }
+                  break;
+               }
+               case 16:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_16 != NULL)
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp += 2)
+                     {
+                        png_uint_16 v;
+
+                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        if (v == trans_values->gray)
+                        {
+                           /* background is already in screen gamma */
+                           *sp = (png_byte)((background->gray >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(background->gray & 0xff);
+                        }
+                        else
+                        {
+                           v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                           *sp = (png_byte)((v >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(v & 0xff);
+                        }
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp += 2)
+                     {
+                        png_uint_16 v;
+
+                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        if (v == trans_values->gray)
+                        {
+                           *sp = (png_byte)((background->gray >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(background->gray & 0xff);
+                        }
+                     }
+                  }
+                  break;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_table != NULL)
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 3)
+                  {
+                     if (*sp == trans_values->red &&
+                        *(sp + 1) == trans_values->green &&
+                        *(sp + 2) == trans_values->blue)
+                     {
+                        *sp = (png_byte)background->red;
+                        *(sp + 1) = (png_byte)background->green;
+                        *(sp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        *sp = gamma_table[*sp];
+                        *(sp + 1) = gamma_table[*(sp + 1)];
+                        *(sp + 2) = gamma_table[*(sp + 2)];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 3)
+                  {
+                     if (*sp == trans_values->red &&
+                        *(sp + 1) == trans_values->green &&
+                        *(sp + 2) == trans_values->blue)
+                     {
+                        *sp = (png_byte)background->red;
+                        *(sp + 1) = (png_byte)background->green;
+                        *(sp + 2) = (png_byte)background->blue;
+                     }
+                  }
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL)
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 6)
+                  {
+                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                     png_uint_16 g = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     png_uint_16 b = (png_uint_16)(((*(sp+4)) << 8) + *(sp+5));
+                     if (r == trans_values->red && g == trans_values->green &&
+                        b == trans_values->blue)
+                     {
+                        /* background is already in screen gamma */
+                        *sp = (png_byte)((background->red >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(background->red & 0xff);
+                        *(sp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(background->green & 0xff);
+                        *(sp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *sp = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        *(sp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        *(sp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(v & 0xff);
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 6)
+                  {
+                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp+1));
+                     png_uint_16 g = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     png_uint_16 b = (png_uint_16)(((*(sp+4)) << 8) + *(sp+5));
+
+                     if (r == trans_values->red && g == trans_values->green &&
+                        b == trans_values->blue)
+                     {
+                        *sp = (png_byte)((background->red >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(background->red & 0xff);
+                        *(sp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(background->green & 0xff);
+                        *(sp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                  }
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                   gamma_table != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 2, dp++)
+                  {
+                     png_uint_16 a = *(sp + 1);
+
+                     if (a == 0xff)
+                     {
+                        *dp = gamma_table[*sp];
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)background->gray;
+                     }
+                     else
+                     {
+                        png_byte v, w;
+
+                        v = gamma_to_1[*sp];
+                        png_composite(w, v, a, background_1->gray);
+                        *dp = gamma_from_1[w];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 2, dp++)
+                  {
+                     png_byte a = *(sp + 1);
+
+                     if (a == 0xff)
+                     {
+                        *dp = *sp;
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)background->gray;
+                     }
+                     else
+                     {
+                        png_composite(*dp, *sp, a, background_1->gray);
+                     }
+#else
+                     *dp = (png_byte)background->gray;
+#endif
+                  }
+               }
+            }
+            else /* if (png_ptr->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                   gamma_16_to_1 != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 2)
+                  {
+                     png_uint_16 a = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_uint_16 v;
+
+                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+#else
+                     else
+#endif
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)((background->gray >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->gray & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else
+                     {
+                        png_uint_16 g, v, w;
+
+                        g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                        png_composite_16(v, g, a, background_1->gray);
+                        w = gamma_16_from_1[(v&0xff) >> gamma_shift][v >> 8];
+                        *dp = (png_byte)((w >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(w & 0xff);
+                     }
+#endif
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 2)
+                  {
+                     png_uint_16 a = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_memcpy(dp, sp, 2);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+#else
+                     else
+#endif
+                     {
+                        *dp = (png_byte)((background->gray >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->gray & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else
+                     {
+                        png_uint_16 g, v;
+
+                        g = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        png_composite_16(v, g, a, background_1->gray);
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                     }
+#endif
+                  }
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                   gamma_table != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 3)
+                  {
+                     png_byte a = *(sp + 3);
+
+                     if (a == 0xff)
+                     {
+                        *dp = gamma_table[*sp];
+                        *(dp + 1) = gamma_table[*(sp + 1)];
+                        *(dp + 2) = gamma_table[*(sp + 2)];
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)background->red;
+                        *(dp + 1) = (png_byte)background->green;
+                        *(dp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        png_byte v, w;
+
+                        v = gamma_to_1[*sp];
+                        png_composite(w, v, a, background_1->red);
+                        *dp = gamma_from_1[w];
+                        v = gamma_to_1[*(sp + 1)];
+                        png_composite(w, v, a, background_1->green);
+                        *(dp + 1) = gamma_from_1[w];
+                        v = gamma_to_1[*(sp + 2)];
+                        png_composite(w, v, a, background_1->blue);
+                        *(dp + 2) = gamma_from_1[w];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 3)
+                  {
+                     png_byte a = *(sp + 3);
+
+                     if (a == 0xff)
+                     {
+                        *dp = *sp;
+                        *(dp + 1) = *(sp + 1);
+                        *(dp + 2) = *(sp + 2);
+                     }
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)background->red;
+                        *(dp + 1) = (png_byte)background->green;
+                        *(dp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        png_composite(*dp, *sp, a, background->red);
+                        png_composite(*(dp + 1), *(sp + 1), a,
+                           background->green);
+                        png_composite(*(dp + 2), *(sp + 2), a,
+                           background->blue);
+                     }
+                  }
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                   gamma_16_to_1 != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 8, dp += 6)
+                  {
+                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                         << 8) + (png_uint_16)(*(sp + 7)));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_uint_16 v;
+
+                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        *(dp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        *(dp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(v & 0xff);
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)((background->red >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->red & 0xff);
+                        *(dp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(background->green & 0xff);
+                        *(dp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v, w, x;
+
+                        v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                        png_composite_16(w, v, a, background_1->red);
+                        x = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
+                        *dp = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(x & 0xff);
+                        v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        png_composite_16(w, v, a, background_1->green);
+                        x = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
+                        *(dp + 2) = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(x & 0xff);
+                        v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        png_composite_16(w, v, a, background_1->blue);
+                        x = gamma_16_from_1[(w & 0xff) >> gamma_shift][w >> 8];
+                        *(dp + 4) = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(x & 0xff);
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 8, dp += 6)
+                  {
+                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                        << 8) + (png_uint_16)(*(sp + 7)));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_memcpy(dp, sp, 6);
+                     }
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)((background->red >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->red & 0xff);
+                        *(dp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(background->green & 0xff);
+                        *(dp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v;
+
+                        png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
+                            + *(sp + 3));
+                        png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
+                            + *(sp + 5));
+
+                        png_composite_16(v, r, a, background->red);
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                        png_composite_16(v, g, a, background->green);
+                        *(dp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(v & 0xff);
+                        png_composite_16(v, b, a, background->blue);
+                        *(dp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(v & 0xff);
+                     }
+                  }
+               }
+            }
+            break;
+         }
+      }
+
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         row_info->color_type &= ~PNG_COLOR_MASK_ALPHA;
+         row_info->channels--;
+         row_info->pixel_depth = (png_byte)(row_info->channels *
+            row_info->bit_depth);
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+/* Gamma correct the image, avoiding the alpha channel.  Make sure
+ * you do this after you deal with the transparency issue on grayscale
+ * or RGB images. If your bit depth is 8, use gamma_table, if it
+ * is 16, use gamma_16_table and gamma_shift.  Build these with
+ * build_gamma_table().
+ */
+void /* PRIVATE */
+png_do_gamma(png_row_infop row_info, png_bytep row,
+   png_bytep gamma_table, png_uint_16pp gamma_16_table,
+   int gamma_shift)
+{
+   png_bytep sp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_gamma\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       ((row_info->bit_depth <= 8 && gamma_table != NULL) ||
+        (row_info->bit_depth == 16 && gamma_16_table != NULL)))
+   {
+      switch (row_info->color_type)
+      {
+         case PNG_COLOR_TYPE_RGB:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v;
+
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  sp++;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 4;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp += 2;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 4;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY:
+         {
+            if (row_info->bit_depth == 2)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i += 4)
+               {
+                  int a = *sp & 0xc0;
+                  int b = *sp & 0x30;
+                  int c = *sp & 0x0c;
+                  int d = *sp & 0x03;
+
+                  *sp = (png_byte)(
+                        ((((int)gamma_table[a|(a>>2)|(a>>4)|(a>>6)])   ) & 0xc0)|
+                        ((((int)gamma_table[(b<<2)|b|(b>>2)|(b>>4)])>>2) & 0x30)|
+                        ((((int)gamma_table[(c<<4)|(c<<2)|c|(c>>2)])>>4) & 0x0c)|
+                        ((((int)gamma_table[(d<<6)|(d<<4)|(d<<2)|d])>>6) ));
+                  sp++;
+               }
+            }
+            if (row_info->bit_depth == 4)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i += 2)
+               {
+                  int msb = *sp & 0xf0;
+                  int lsb = *sp & 0x0f;
+
+                  *sp = (png_byte)((((int)gamma_table[msb | (msb >> 4)]) & 0xf0)
+                          | (((int)gamma_table[(lsb << 4) | lsb]) >> 4));
+                  sp++;
+               }
+            }
+            else if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+               }
+            }
+            else if (row_info->bit_depth == 16)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+               }
+            }
+            break;
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expands a palette row to an RGB or RGBA row depending
+ * upon whether you supply trans and num_trans.
+ */
+void /* PRIVATE */
+png_do_expand_palette(png_row_infop row_info, png_bytep row,
+   png_colorp palette, png_bytep trans, int num_trans)
+{
+   int shift, value;
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_expand_palette\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (row_info->bit_depth < 8)
+      {
+         switch (row_info->bit_depth)
+         {
+            case 1:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 3);
+               dp = row + (png_size_t)row_width - 1;
+               shift = 7 - (int)((row_width + 7) & 0x07);
+               for (i = 0; i < row_width; i++)
+               {
+                  if ((*sp >> shift) & 0x01)
+                     *dp = 1;
+                  else
+                     *dp = 0;
+                  if (shift == 7)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift++;
+
+                  dp--;
+               }
+               break;
+            }
+            case 2:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 2);
+               dp = row + (png_size_t)row_width - 1;
+               shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+               for (i = 0; i < row_width; i++)
+               {
+                  value = (*sp >> shift) & 0x03;
+                  *dp = (png_byte)value;
+                  if (shift == 6)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift += 2;
+
+                  dp--;
+               }
+               break;
+            }
+            case 4:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 1);
+               dp = row + (png_size_t)row_width - 1;
+               shift = (int)((row_width & 0x01) << 2);
+               for (i = 0; i < row_width; i++)
+               {
+                  value = (*sp >> shift) & 0x0f;
+                  *dp = (png_byte)value;
+                  if (shift == 4)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift += 4;
+
+                  dp--;
+               }
+               break;
+            }
+         }
+         row_info->bit_depth = 8;
+         row_info->pixel_depth = 8;
+         row_info->rowbytes = row_width;
+      }
+      switch (row_info->bit_depth)
+      {
+         case 8:
+         {
+            if (trans != NULL)
+            {
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width << 2) - 1;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  if ((int)(*sp) >= num_trans)
+                     *dp-- = 0xff;
+                  else
+                     *dp-- = trans[*sp];
+                  *dp-- = palette[*sp].blue;
+                  *dp-- = palette[*sp].green;
+                  *dp-- = palette[*sp].red;
+                  sp--;
+               }
+               row_info->bit_depth = 8;
+               row_info->pixel_depth = 32;
+               row_info->rowbytes = row_width * 4;
+               row_info->color_type = 6;
+               row_info->channels = 4;
+            }
+            else
+            {
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width * 3) - 1;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  *dp-- = palette[*sp].blue;
+                  *dp-- = palette[*sp].green;
+                  *dp-- = palette[*sp].red;
+                  sp--;
+               }
+               row_info->bit_depth = 8;
+               row_info->pixel_depth = 24;
+               row_info->rowbytes = row_width * 3;
+               row_info->color_type = 2;
+               row_info->channels = 3;
+            }
+            break;
+         }
+      }
+   }
+}
+
+/* If the bit depth < 8, it is expanded to 8.  Also, if the already
+ * expanded transparency value is supplied, an alpha channel is built.
+ */
+void /* PRIVATE */
+png_do_expand(png_row_infop row_info, png_bytep row,
+   png_color_16p trans_value)
+{
+   int shift, value;
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_expand\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         png_uint_16 gray = (png_uint_16)(trans_value ? trans_value->gray : 0);
+
+         if (row_info->bit_depth < 8)
+         {
+            switch (row_info->bit_depth)
+            {
+               case 1:
+               {
+                  gray = (png_uint_16)((gray&0x01)*0xff);
+                  sp = row + (png_size_t)((row_width - 1) >> 3);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = 7 - (int)((row_width + 7) & 0x07);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     if ((*sp >> shift) & 0x01)
+                        *dp = 0xff;
+                     else
+                        *dp = 0;
+                     if (shift == 7)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift++;
+
+                     dp--;
+                  }
+                  break;
+               }
+               case 2:
+               {
+                  gray = (png_uint_16)((gray&0x03)*0x55);
+                  sp = row + (png_size_t)((row_width - 1) >> 2);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     value = (*sp >> shift) & 0x03;
+                     *dp = (png_byte)(value | (value << 2) | (value << 4) |
+                        (value << 6));
+                     if (shift == 6)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift += 2;
+
+                     dp--;
+                  }
+                  break;
+               }
+               case 4:
+               {
+                  gray = (png_uint_16)((gray&0x0f)*0x11);
+                  sp = row + (png_size_t)((row_width - 1) >> 1);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     value = (*sp >> shift) & 0x0f;
+                     *dp = (png_byte)(value | (value << 4));
+                     if (shift == 4)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift = 4;
+
+                     dp--;
+                  }
+                  break;
+               }
+            }
+            row_info->bit_depth = 8;
+            row_info->pixel_depth = 8;
+            row_info->rowbytes = row_width;
+         }
+
+         if (trans_value != NULL)
+         {
+            if (row_info->bit_depth == 8)
+            {
+               gray = gray & 0xff;
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width << 1) - 1;
+               for (i = 0; i < row_width; i++)
+               {
+                  if (*sp == gray)
+                     *dp-- = 0;
+                  else
+                     *dp-- = 0xff;
+                  *dp-- = *sp--;
+               }
+            }
+            else if (row_info->bit_depth == 16)
+            {
+               png_byte gray_high = (gray >> 8) & 0xff;
+               png_byte gray_low = gray & 0xff;
+               sp = row + row_info->rowbytes - 1;
+               dp = row + (row_info->rowbytes << 1) - 1;
+               for (i = 0; i < row_width; i++)
+               {
+                  if (*(sp-1) == gray_high && *(sp) == gray_low) 
+                  {
+                     *dp-- = 0;
+                     *dp-- = 0;
+                  }
+                  else
+                  {
+                     *dp-- = 0xff;
+                     *dp-- = 0xff;
+                  }
+                  *dp-- = *sp--;
+                  *dp-- = *sp--;
+               }
+            }
+            row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
+            row_info->channels = 2;
+            row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1);
+            row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+               row_width);
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_RGB && trans_value)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_byte red = trans_value->red & 0xff;
+            png_byte green = trans_value->green & 0xff;
+            png_byte blue = trans_value->blue & 0xff;
+            sp = row + (png_size_t)row_info->rowbytes - 1;
+            dp = row + (png_size_t)(row_width << 2) - 1;
+            for (i = 0; i < row_width; i++)
+            {
+               if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
+                  *dp-- = 0;
+               else
+                  *dp-- = 0xff;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+            }
+         }
+         else if (row_info->bit_depth == 16)
+         {
+            png_byte red_high = (trans_value->red >> 8) & 0xff;
+            png_byte green_high = (trans_value->green >> 8) & 0xff;
+            png_byte blue_high = (trans_value->blue >> 8) & 0xff;
+            png_byte red_low = trans_value->red & 0xff;
+            png_byte green_low = trans_value->green & 0xff;
+            png_byte blue_low = trans_value->blue & 0xff;
+            sp = row + row_info->rowbytes - 1;
+            dp = row + (png_size_t)(row_width << 3) - 1;
+            for (i = 0; i < row_width; i++)
+            {
+               if (*(sp - 5) == red_high &&
+                  *(sp - 4) == red_low &&
+                  *(sp - 3) == green_high &&
+                  *(sp - 2) == green_low &&
+                  *(sp - 1) == blue_high &&
+                  *(sp    ) == blue_low)
+               {
+                  *dp-- = 0;
+                  *dp-- = 0;
+               }
+               else
+               {
+                  *dp-- = 0xff;
+                  *dp-- = 0xff;
+               }
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+            }
+         }
+         row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+         row_info->channels = 4;
+         row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2);
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+void /* PRIVATE */
+png_do_dither(png_row_infop row_info, png_bytep row,
+    png_bytep palette_lookup, png_bytep dither_lookup)
+{
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_dither\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB &&
+         palette_lookup && row_info->bit_depth == 8)
+      {
+         int r, g, b, p;
+         sp = row;
+         dp = row;
+         for (i = 0; i < row_width; i++)
+         {
+            r = *sp++;
+            g = *sp++;
+            b = *sp++;
+
+            /* this looks real messy, but the compiler will reduce
+               it down to a reasonable formula.  For example, with
+               5 bits per color, we get:
+               p = (((r >> 3) & 0x1f) << 10) |
+                  (((g >> 3) & 0x1f) << 5) |
+                  ((b >> 3) & 0x1f);
+               */
+            p = (((r >> (8 - PNG_DITHER_RED_BITS)) &
+               ((1 << PNG_DITHER_RED_BITS) - 1)) <<
+               (PNG_DITHER_GREEN_BITS + PNG_DITHER_BLUE_BITS)) |
+               (((g >> (8 - PNG_DITHER_GREEN_BITS)) &
+               ((1 << PNG_DITHER_GREEN_BITS) - 1)) <<
+               (PNG_DITHER_BLUE_BITS)) |
+               ((b >> (8 - PNG_DITHER_BLUE_BITS)) &
+               ((1 << PNG_DITHER_BLUE_BITS) - 1));
+
+            *dp++ = palette_lookup[p];
+         }
+         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
+         row_info->channels = 1;
+         row_info->pixel_depth = row_info->bit_depth;
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
+         palette_lookup != NULL && row_info->bit_depth == 8)
+      {
+         int r, g, b, p;
+         sp = row;
+         dp = row;
+         for (i = 0; i < row_width; i++)
+         {
+            r = *sp++;
+            g = *sp++;
+            b = *sp++;
+            sp++;
+
+            p = (((r >> (8 - PNG_DITHER_RED_BITS)) &
+               ((1 << PNG_DITHER_RED_BITS) - 1)) <<
+               (PNG_DITHER_GREEN_BITS + PNG_DITHER_BLUE_BITS)) |
+               (((g >> (8 - PNG_DITHER_GREEN_BITS)) &
+               ((1 << PNG_DITHER_GREEN_BITS) - 1)) <<
+               (PNG_DITHER_BLUE_BITS)) |
+               ((b >> (8 - PNG_DITHER_BLUE_BITS)) &
+               ((1 << PNG_DITHER_BLUE_BITS) - 1));
+
+            *dp++ = palette_lookup[p];
+         }
+         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
+         row_info->channels = 1;
+         row_info->pixel_depth = row_info->bit_depth;
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
+         dither_lookup && row_info->bit_depth == 8)
+      {
+         sp = row;
+         for (i = 0; i < row_width; i++, sp++)
+         {
+            *sp = dither_lookup[*sp];
+         }
+      }
+   }
+}
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+static PNG_CONST int png_gamma_shift[] =
+   {0x10, 0x21, 0x42, 0x84, 0x110, 0x248, 0x550, 0xff0, 0x00};
+
+/* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
+ * tables, we don't make a full table if we are reducing to 8-bit in
+ * the future.  Note also how the gamma_16 tables are segmented so that
+ * we don't need to allocate > 64K chunks for a full 16-bit table.
+ */
+void /* PRIVATE */
+png_build_gamma_table(png_structp png_ptr)
+{
+  png_debug(1, "in png_build_gamma_table\n");
+
+  if (png_ptr->bit_depth <= 8)
+  {
+     int i;
+     double g;
+
+     if (png_ptr->screen_gamma > .000001)
+        g = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+     else
+        g = 1.0;
+
+     png_ptr->gamma_table = (png_bytep)png_malloc(png_ptr,
+        (png_uint_32)256);
+
+     for (i = 0; i < 256; i++)
+     {
+        png_ptr->gamma_table[i] = (png_byte)(pow((double)i / 255.0,
+           g) * 255.0 + .5);
+     }
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+     if (png_ptr->transformations & ((PNG_BACKGROUND) | PNG_RGB_TO_GRAY))
+     {
+
+        g = 1.0 / (png_ptr->gamma);
+
+        png_ptr->gamma_to_1 = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)256);
+
+        for (i = 0; i < 256; i++)
+        {
+           png_ptr->gamma_to_1[i] = (png_byte)(pow((double)i / 255.0,
+              g) * 255.0 + .5);
+        }
+
+
+        png_ptr->gamma_from_1 = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)256);
+
+        if(png_ptr->screen_gamma > 0.000001)
+           g = 1.0 / png_ptr->screen_gamma;
+        else
+           g = png_ptr->gamma;   /* probably doing rgb_to_gray */
+
+        for (i = 0; i < 256; i++)
+        {
+           png_ptr->gamma_from_1[i] = (png_byte)(pow((double)i / 255.0,
+              g) * 255.0 + .5);
+
+        }
+     }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_RGB_TO_GRAY_SUPPORTED */
+  }
+  else
+  {
+     double g;
+     int i, j, shift, num;
+     int sig_bit;
+     png_uint_32 ig;
+
+     if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+     {
+        sig_bit = (int)png_ptr->sig_bit.red;
+        if ((int)png_ptr->sig_bit.green > sig_bit)
+           sig_bit = png_ptr->sig_bit.green;
+        if ((int)png_ptr->sig_bit.blue > sig_bit)
+           sig_bit = png_ptr->sig_bit.blue;
+     }
+     else
+     {
+        sig_bit = (int)png_ptr->sig_bit.gray;
+     }
+
+     if (sig_bit > 0)
+        shift = 16 - sig_bit;
+     else
+        shift = 0;
+
+     if (png_ptr->transformations & PNG_16_TO_8)
+     {
+        if (shift < (16 - PNG_MAX_GAMMA_8))
+           shift = (16 - PNG_MAX_GAMMA_8);
+     }
+
+     if (shift > 8)
+        shift = 8;
+     if (shift < 0)
+        shift = 0;
+
+     png_ptr->gamma_shift = (png_byte)shift;
+
+     num = (1 << (8 - shift));
+
+     if (png_ptr->screen_gamma > .000001)
+        g = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+     else
+        g = 1.0;
+
+     png_ptr->gamma_16_table = (png_uint_16pp)png_malloc(png_ptr,
+        (png_uint_32)(num * png_sizeof (png_uint_16p)));
+
+     if (png_ptr->transformations & (PNG_16_TO_8 | PNG_BACKGROUND))
+     {
+        double fin, fout;
+        png_uint_32 last, max;
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_table[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+        }
+
+        g = 1.0 / g;
+        last = 0;
+        for (i = 0; i < 256; i++)
+        {
+           fout = ((double)i + 0.5) / 256.0;
+           fin = pow(fout, g);
+           max = (png_uint_32)(fin * (double)((png_uint_32)num << 8));
+           while (last <= max)
+           {
+              png_ptr->gamma_16_table[(int)(last & (0xff >> shift))]
+                 [(int)(last >> (8 - shift))] = (png_uint_16)(
+                 (png_uint_16)i | ((png_uint_16)i << 8));
+              last++;
+           }
+        }
+        while (last < ((png_uint_32)num << 8))
+        {
+           png_ptr->gamma_16_table[(int)(last & (0xff >> shift))]
+              [(int)(last >> (8 - shift))] = (png_uint_16)65535L;
+           last++;
+        }
+     }
+     else
+     {
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_table[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i * (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_table[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+     }
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+     if (png_ptr->transformations & (PNG_BACKGROUND | PNG_RGB_TO_GRAY))
+     {
+
+        g = 1.0 / (png_ptr->gamma);
+
+        png_ptr->gamma_16_to_1 = (png_uint_16pp)png_malloc(png_ptr,
+           (png_uint_32)(num * png_sizeof (png_uint_16p )));
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_to_1[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i *
+              (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_to_1[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+
+        if(png_ptr->screen_gamma > 0.000001)
+           g = 1.0 / png_ptr->screen_gamma;
+        else
+           g = png_ptr->gamma;   /* probably doing rgb_to_gray */
+
+        png_ptr->gamma_16_from_1 = (png_uint_16pp)png_malloc(png_ptr,
+           (png_uint_32)(num * png_sizeof (png_uint_16p)));
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_from_1[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i *
+              (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_from_1[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+     }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_RGB_TO_GRAY_SUPPORTED */
+  }
+}
+#endif
+/* To do: install integer version of png_build_gamma_table here */
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+/* undoes intrapixel differencing  */
+void /* PRIVATE */
+png_do_read_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_intrapixel\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp) = (png_byte)((256 + *rp + *(rp+1))&0xff);
+            *(rp+2) = (png_byte)((256 + *(rp+2) + *(rp+1))&0xff);
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (*(rp  ) << 8) | *(rp+1);
+            png_uint_32 s1   = (*(rp+2) << 8) | *(rp+3);
+            png_uint_32 s2   = (*(rp+4) << 8) | *(rp+5);
+            png_uint_32 red  = (png_uint_32)((s0+s1+65536L) & 0xffffL);
+            png_uint_32 blue = (png_uint_32)((s2+s1+65536L) & 0xffffL);
+            *(rp  ) = (png_byte)((red >> 8) & 0xff);
+            *(rp+1) = (png_byte)(red & 0xff);
+            *(rp+4) = (png_byte)((blue >> 8) & 0xff);
+            *(rp+5) = (png_byte)(blue & 0xff);
+         }
+      }
+   }
+}
+#endif /* PNG_MNG_FEATURES_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/PNG/pngrutil.c b/PNG/pngrutil.c
new file mode 100644
index 0000000..531cb05
--- /dev/null
+++ b/PNG/pngrutil.c
@@ -0,0 +1,3183 @@
+
+/* pngrutil.c - utilities to read a PNG file
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains routines that are only called from within
+ * libpng itself during the course of reading an image.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+#if defined(_WIN32_WCE) && (_WIN32_WCE<0x500)
+#  define WIN32_WCE_OLD
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+#  if defined(WIN32_WCE_OLD)
+/* strtod() function is not supported on WindowsCE */
+__inline double png_strtod(png_structp png_ptr, PNG_CONST char *nptr, char **endptr)
+{
+   double result = 0;
+   int len;
+   wchar_t *str, *end;
+
+   len = MultiByteToWideChar(CP_ACP, 0, nptr, -1, NULL, 0);
+   str = (wchar_t *)png_malloc(png_ptr, len * sizeof(wchar_t));
+   if ( NULL != str )
+   {
+      MultiByteToWideChar(CP_ACP, 0, nptr, -1, str, len);
+      result = wcstod(str, &end);
+      len = WideCharToMultiByte(CP_ACP, 0, end, -1, NULL, 0, NULL, NULL);
+      *endptr = (char *)nptr + (png_strlen(nptr) - len + 1);
+      png_free(png_ptr, str);
+   }
+   return result;
+}
+#  else
+#    define png_strtod(p,a,b) strtod(a,b)
+#  endif
+#endif
+
+png_uint_32 PNGAPI
+png_get_uint_31(png_structp png_ptr, png_bytep buf)
+{
+   png_uint_32 i = png_get_uint_32(buf);
+   if (i > PNG_UINT_31_MAX)
+     png_error(png_ptr, "PNG unsigned integer out of range.");
+   return (i);
+}
+#ifndef PNG_READ_BIG_ENDIAN_SUPPORTED
+/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */
+png_uint_32 PNGAPI
+png_get_uint_32(png_bytep buf)
+{
+   png_uint_32 i = ((png_uint_32)(*buf) << 24) +
+      ((png_uint_32)(*(buf + 1)) << 16) +
+      ((png_uint_32)(*(buf + 2)) << 8) +
+      (png_uint_32)(*(buf + 3));
+
+   return (i);
+}
+
+/* Grab a signed 32-bit integer from a buffer in big-endian format.  The
+ * data is stored in the PNG file in two's complement format, and it is
+ * assumed that the machine format for signed integers is the same. */
+png_int_32 PNGAPI
+png_get_int_32(png_bytep buf)
+{
+   png_int_32 i = ((png_int_32)(*buf) << 24) +
+      ((png_int_32)(*(buf + 1)) << 16) +
+      ((png_int_32)(*(buf + 2)) << 8) +
+      (png_int_32)(*(buf + 3));
+
+   return (i);
+}
+
+/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */
+png_uint_16 PNGAPI
+png_get_uint_16(png_bytep buf)
+{
+   png_uint_16 i = (png_uint_16)(((png_uint_16)(*buf) << 8) +
+      (png_uint_16)(*(buf + 1)));
+
+   return (i);
+}
+#endif /* PNG_READ_BIG_ENDIAN_SUPPORTED */
+
+/* Read data, and (optionally) run it through the CRC. */
+void /* PRIVATE */
+png_crc_read(png_structp png_ptr, png_bytep buf, png_size_t length)
+{
+   if(png_ptr == NULL) return;
+   png_read_data(png_ptr, buf, length);
+   png_calculate_crc(png_ptr, buf, length);
+}
+
+/* Optionally skip data and then check the CRC.  Depending on whether we
+   are reading a ancillary or critical chunk, and how the program has set
+   things up, we may calculate the CRC on the data and print a message.
+   Returns '1' if there was a CRC error, '0' otherwise. */
+int /* PRIVATE */
+png_crc_finish(png_structp png_ptr, png_uint_32 skip)
+{
+   png_size_t i;
+   png_size_t istop = png_ptr->zbuf_size;
+
+   for (i = (png_size_t)skip; i > istop; i -= istop)
+   {
+      png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+   }
+   if (i)
+   {
+      png_crc_read(png_ptr, png_ptr->zbuf, i);
+   }
+
+   if (png_crc_error(png_ptr))
+   {
+      if (((png_ptr->chunk_name[0] & 0x20) &&                /* Ancillary */
+           !(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)) ||
+          (!(png_ptr->chunk_name[0] & 0x20) &&             /* Critical  */
+          (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE)))
+      {
+         png_chunk_warning(png_ptr, "CRC error");
+      }
+      else
+      {
+         png_chunk_error(png_ptr, "CRC error");
+      }
+      return (1);
+   }
+
+   return (0);
+}
+
+/* Compare the CRC stored in the PNG file with that calculated by libpng from
+   the data it has read thus far. */
+int /* PRIVATE */
+png_crc_error(png_structp png_ptr)
+{
+   png_byte crc_bytes[4];
+   png_uint_32 crc;
+   int need_crc = 1;
+
+   if (png_ptr->chunk_name[0] & 0x20)                     /* ancillary */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+   else                                                    /* critical */
+   {
+      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
+         need_crc = 0;
+   }
+
+   png_read_data(png_ptr, crc_bytes, 4);
+
+   if (need_crc)
+   {
+      crc = png_get_uint_32(crc_bytes);
+      return ((int)(crc != png_ptr->crc));
+   }
+   else
+      return (0);
+}
+
+#if defined(PNG_READ_zTXt_SUPPORTED) || defined(PNG_READ_iTXt_SUPPORTED) || \
+    defined(PNG_READ_iCCP_SUPPORTED)
+/*
+ * Decompress trailing data in a chunk.  The assumption is that chunkdata
+ * points at an allocated area holding the contents of a chunk with a
+ * trailing compressed part.  What we get back is an allocated area
+ * holding the original prefix part and an uncompressed version of the
+ * trailing part (the malloc area passed in is freed).
+ */
+png_charp /* PRIVATE */
+png_decompress_chunk(png_structp png_ptr, int comp_type,
+                              png_charp chunkdata, png_size_t chunklength,
+                              png_size_t prefix_size, png_size_t *newlength)
+{
+   static PNG_CONST char msg[] = "Error decoding compressed text";
+   png_charp text;
+   png_size_t text_size;
+
+   if (comp_type == PNG_COMPRESSION_TYPE_BASE)
+   {
+      int ret = Z_OK;
+      png_ptr->zstream.next_in = (png_bytep)(chunkdata + prefix_size);
+      png_ptr->zstream.avail_in = (uInt)(chunklength - prefix_size);
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+      text_size = 0;
+      text = NULL;
+
+      while (png_ptr->zstream.avail_in)
+      {
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret != Z_OK && ret != Z_STREAM_END)
+         {
+            if (png_ptr->zstream.msg != NULL)
+               png_warning(png_ptr, png_ptr->zstream.msg);
+            else
+               png_warning(png_ptr, msg);
+            inflateReset(&png_ptr->zstream);
+            png_ptr->zstream.avail_in = 0;
+
+            if (text ==  NULL)
+            {
+               text_size = prefix_size + png_sizeof(msg) + 1;
+               text = (png_charp)png_malloc_warn(png_ptr, text_size);
+               if (text ==  NULL)
+                 {
+                    png_free(png_ptr,chunkdata);
+                    png_error(png_ptr,"Not enough memory to decompress chunk");
+                 }
+               png_memcpy(text, chunkdata, prefix_size);
+            }
+
+            text[text_size - 1] = 0x00;
+
+            /* Copy what we can of the error message into the text chunk */
+            text_size = (png_size_t)(chunklength - (text - chunkdata) - 1);
+            text_size = png_sizeof(msg) > text_size ? text_size :
+               png_sizeof(msg);
+            png_memcpy(text + prefix_size, msg, text_size);
+            break;
+         }
+         if (!png_ptr->zstream.avail_out || ret == Z_STREAM_END)
+         {
+            if (text == NULL)
+            {
+               text_size = prefix_size +
+                   png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               text = (png_charp)png_malloc_warn(png_ptr, text_size + 1);
+               if (text ==  NULL)
+                 {
+                    png_free(png_ptr,chunkdata);
+                    png_error(png_ptr,"Not enough memory to decompress chunk.");
+                 }
+               png_memcpy(text + prefix_size, png_ptr->zbuf,
+                    text_size - prefix_size);
+               png_memcpy(text, chunkdata, prefix_size);
+               *(text + text_size) = 0x00;
+            }
+            else
+            {
+               png_charp tmp;
+
+               tmp = text;
+               text = (png_charp)png_malloc_warn(png_ptr,
+                  (png_uint_32)(text_size +
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out + 1));
+               if (text == NULL)
+               {
+                  png_free(png_ptr, tmp);
+                  png_free(png_ptr, chunkdata);
+                  png_error(png_ptr,"Not enough memory to decompress chunk..");
+               }
+               png_memcpy(text, tmp, text_size);
+               png_free(png_ptr, tmp);
+               png_memcpy(text + text_size, png_ptr->zbuf,
+                  (png_ptr->zbuf_size - png_ptr->zstream.avail_out));
+               text_size += png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               *(text + text_size) = 0x00;
+            }
+            if (ret == Z_STREAM_END)
+               break;
+            else
+            {
+               png_ptr->zstream.next_out = png_ptr->zbuf;
+               png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            }
+         }
+      }
+      if (ret != Z_STREAM_END)
+      {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+         char umsg[52];
+
+         if (ret == Z_BUF_ERROR)
+            png_snprintf(umsg, 52,
+                "Buffer error in compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         else if (ret == Z_DATA_ERROR)
+            png_snprintf(umsg, 52,
+                "Data error in compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         else
+            png_snprintf(umsg, 52,
+                "Incomplete compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         png_warning(png_ptr, umsg);
+#else
+         png_warning(png_ptr,
+            "Incomplete compressed datastream in chunk other than IDAT");
+#endif
+         text_size=prefix_size;
+         if (text ==  NULL)
+         {
+            text = (png_charp)png_malloc_warn(png_ptr, text_size+1);
+            if (text == NULL)
+              {
+                png_free(png_ptr, chunkdata);
+                png_error(png_ptr,"Not enough memory for text.");
+              }
+            png_memcpy(text, chunkdata, prefix_size);
+         }
+         *(text + text_size) = 0x00;
+      }
+
+      inflateReset(&png_ptr->zstream);
+      png_ptr->zstream.avail_in = 0;
+
+      png_free(png_ptr, chunkdata);
+      chunkdata = text;
+      *newlength=text_size;
+   }
+   else /* if (comp_type != PNG_COMPRESSION_TYPE_BASE) */
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char umsg[50];
+
+      png_snprintf(umsg, 50,
+         "Unknown zTXt compression type %d", comp_type);
+      png_warning(png_ptr, umsg);
+#else
+      png_warning(png_ptr, "Unknown zTXt compression type");
+#endif
+
+      *(chunkdata + prefix_size) = 0x00;
+      *newlength=prefix_size;
+   }
+
+   return chunkdata;
+}
+#endif
+
+/* read and check the IDHR chunk */
+void /* PRIVATE */
+png_handle_IHDR(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[13];
+   png_uint_32 width, height;
+   int bit_depth, color_type, compression_type, filter_type;
+   int interlace_type;
+
+   png_debug(1, "in png_handle_IHDR\n");
+
+   if (png_ptr->mode & PNG_HAVE_IHDR)
+      png_error(png_ptr, "Out of place IHDR");
+
+   /* check the length */
+   if (length != 13)
+      png_error(png_ptr, "Invalid IHDR chunk");
+
+   png_ptr->mode |= PNG_HAVE_IHDR;
+
+   png_crc_read(png_ptr, buf, 13);
+   png_crc_finish(png_ptr, 0);
+
+   width = png_get_uint_31(png_ptr, buf);
+   height = png_get_uint_31(png_ptr, buf + 4);
+   bit_depth = buf[8];
+   color_type = buf[9];
+   compression_type = buf[10];
+   filter_type = buf[11];
+   interlace_type = buf[12];
+
+   /* set internal variables */
+   png_ptr->width = width;
+   png_ptr->height = height;
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->interlaced = (png_byte)interlace_type;
+   png_ptr->color_type = (png_byte)color_type;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+
+   /* find number of channels */
+   switch (png_ptr->color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+      case PNG_COLOR_TYPE_PALETTE:
+         png_ptr->channels = 1;
+         break;
+      case PNG_COLOR_TYPE_RGB:
+         png_ptr->channels = 3;
+         break;
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         png_ptr->channels = 2;
+         break;
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         png_ptr->channels = 4;
+         break;
+   }
+
+   /* set up other useful info */
+   png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth *
+   png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,png_ptr->width);
+   png_debug1(3,"bit_depth = %d\n", png_ptr->bit_depth);
+   png_debug1(3,"channels = %d\n", png_ptr->channels);
+   png_debug1(3,"rowbytes = %lu\n", png_ptr->rowbytes);
+   png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth,
+      color_type, interlace_type, compression_type, filter_type);
+}
+
+/* read and check the palette */
+void /* PRIVATE */
+png_handle_PLTE(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_color palette[PNG_MAX_PALETTE_LENGTH];
+   int num, i;
+#ifndef PNG_NO_POINTER_INDEXING
+   png_colorp pal_ptr;
+#endif
+
+   png_debug(1, "in png_handle_PLTE\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before PLTE");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid PLTE after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      png_error(png_ptr, "Duplicate PLTE chunk");
+
+   png_ptr->mode |= PNG_HAVE_PLTE;
+
+   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
+   {
+      png_warning(png_ptr,
+        "Ignoring PLTE chunk in grayscale PNG");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+#endif
+
+   if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3)
+   {
+      if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+      {
+         png_warning(png_ptr, "Invalid palette chunk");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      else
+      {
+         png_error(png_ptr, "Invalid palette chunk");
+      }
+   }
+
+   num = (int)length / 3;
+
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      pal_ptr->red = buf[0];
+      pal_ptr->green = buf[1];
+      pal_ptr->blue = buf[2];
+   }
+#else
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      /* don't depend upon png_color being any order */
+      palette[i].red = buf[0];
+      palette[i].green = buf[1];
+      palette[i].blue = buf[2];
+   }
+#endif
+
+   /* If we actually NEED the PLTE chunk (ie for a paletted image), we do
+      whatever the normal CRC configuration tells us.  However, if we
+      have an RGB image, the PLTE can be considered ancillary, so
+      we will act as though it is. */
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+#endif
+   {
+      png_crc_finish(png_ptr, 0);
+   }
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   else if (png_crc_error(png_ptr))  /* Only if we have a CRC error */
+   {
+      /* If we don't want to use the data from an ancillary chunk,
+         we have two options: an error abort, or a warning and we
+         ignore the data in this chunk (which should be OK, since
+         it's considered ancillary for a RGB or RGBA image). */
+      if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE))
+      {
+         if (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)
+         {
+            png_chunk_error(png_ptr, "CRC error");
+         }
+         else
+         {
+            png_chunk_warning(png_ptr, "CRC error");
+            return;
+         }
+      }
+      /* Otherwise, we (optionally) emit a warning and use the chunk. */
+      else if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN))
+      {
+         png_chunk_warning(png_ptr, "CRC error");
+      }
+   }
+#endif
+
+   png_set_PLTE(png_ptr, info_ptr, palette, num);
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+      {
+         if (png_ptr->num_trans > (png_uint_16)num)
+         {
+            png_warning(png_ptr, "Truncating incorrect tRNS chunk length");
+            png_ptr->num_trans = (png_uint_16)num;
+         }
+         if (info_ptr->num_trans > (png_uint_16)num)
+         {
+            png_warning(png_ptr, "Truncating incorrect info tRNS chunk length");
+            info_ptr->num_trans = (png_uint_16)num;
+         }
+      }
+   }
+#endif
+
+}
+
+void /* PRIVATE */
+png_handle_IEND(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_debug(1, "in png_handle_IEND\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || !(png_ptr->mode & PNG_HAVE_IDAT))
+   {
+      png_error(png_ptr, "No image in file");
+   }
+
+   png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND);
+
+   if (length != 0)
+   {
+      png_warning(png_ptr, "Incorrect IEND chunk length");
+   }
+   png_crc_finish(png_ptr, length);
+
+   info_ptr =info_ptr; /* quiet compiler warnings about unused info_ptr */
+}
+
+#if defined(PNG_READ_gAMA_SUPPORTED)
+void /* PRIVATE */
+png_handle_gAMA(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_fixed_point igamma;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float file_gamma;
+#endif
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_gAMA\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before gAMA");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid gAMA after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place gAMA chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      && !(info_ptr->valid & PNG_INFO_sRGB)
+#endif
+      )
+   {
+      png_warning(png_ptr, "Duplicate gAMA chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 4)
+   {
+      png_warning(png_ptr, "Incorrect gAMA chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 4);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   igamma = (png_fixed_point)png_get_uint_32(buf);
+   /* check for zero gamma */
+   if (igamma == 0)
+      {
+         png_warning(png_ptr,
+           "Ignoring gAMA chunk with gamma=0");
+         return;
+      }
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
+      if (PNG_OUT_OF_RANGE(igamma, 45500L, 500))
+      {
+         png_warning(png_ptr,
+           "Ignoring incorrect gAMA value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+         fprintf(stderr, "gamma = (%d/100000)\n", (int)igamma);
+#endif
+         return;
+      }
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   file_gamma = (float)igamma / (float)100000.0;
+#  ifdef PNG_READ_GAMMA_SUPPORTED
+     png_ptr->gamma = file_gamma;
+#  endif
+     png_set_gAMA(png_ptr, info_ptr, file_gamma);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_gAMA_fixed(png_ptr, info_ptr, igamma);
+#endif
+}
+#endif
+
+#if defined(PNG_READ_sBIT_SUPPORTED)
+void /* PRIVATE */
+png_handle_sBIT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_size_t truelen;
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_sBIT\n");
+
+   buf[0] = buf[1] = buf[2] = buf[3] = 0;
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sBIT");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sBIT after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+   {
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place sBIT chunk");
+   }
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT))
+   {
+      png_warning(png_ptr, "Duplicate sBIT chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      truelen = 3;
+   else
+      truelen = (png_size_t)png_ptr->channels;
+
+   if (length != truelen || length > 4)
+   {
+      png_warning(png_ptr, "Incorrect sBIT chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, truelen);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[1];
+      png_ptr->sig_bit.blue = buf[2];
+      png_ptr->sig_bit.alpha = buf[3];
+   }
+   else
+   {
+      png_ptr->sig_bit.gray = buf[0];
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[0];
+      png_ptr->sig_bit.blue = buf[0];
+      png_ptr->sig_bit.alpha = buf[1];
+   }
+   png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit));
+}
+#endif
+
+#if defined(PNG_READ_cHRM_SUPPORTED)
+void /* PRIVATE */
+png_handle_cHRM(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[4];
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+#endif
+   png_fixed_point int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue;
+
+   png_uint_32 uint_x, uint_y;
+
+   png_debug(1, "in png_handle_cHRM\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before cHRM");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid cHRM after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Missing PLTE before cHRM");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      && !(info_ptr->valid & PNG_INFO_sRGB)
+#endif
+      )
+   {
+      png_warning(png_ptr, "Duplicate cHRM chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 32)
+   {
+      png_warning(png_ptr, "Incorrect cHRM chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x > 80000L || uint_y > 80000L ||
+      uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM white point");
+      png_crc_finish(png_ptr, 24);
+      return;
+   }
+   int_x_white = (png_fixed_point)uint_x;
+   int_y_white = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM red point");
+      png_crc_finish(png_ptr, 16);
+      return;
+   }
+   int_x_red = (png_fixed_point)uint_x;
+   int_y_red = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM green point");
+      png_crc_finish(png_ptr, 8);
+      return;
+   }
+   int_x_green = (png_fixed_point)uint_x;
+   int_y_green = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM blue point");
+      png_crc_finish(png_ptr, 0);
+      return;
+   }
+   int_x_blue = (png_fixed_point)uint_x;
+   int_y_blue = (png_fixed_point)uint_y;
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   white_x = (float)int_x_white / (float)100000.0;
+   white_y = (float)int_y_white / (float)100000.0;
+   red_x   = (float)int_x_red   / (float)100000.0;
+   red_y   = (float)int_y_red   / (float)100000.0;
+   green_x = (float)int_x_green / (float)100000.0;
+   green_y = (float)int_y_green / (float)100000.0;
+   blue_x  = (float)int_x_blue  / (float)100000.0;
+   blue_y  = (float)int_y_blue  / (float)100000.0;
+#endif
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   if ((info_ptr != NULL) && (info_ptr->valid & PNG_INFO_sRGB))
+      {
+      if (PNG_OUT_OF_RANGE(int_x_white, 31270,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_white, 32900,  1000) ||
+          PNG_OUT_OF_RANGE(int_x_red,   64000L, 1000) ||
+          PNG_OUT_OF_RANGE(int_y_red,   33000,  1000) ||
+          PNG_OUT_OF_RANGE(int_x_green, 30000,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_green, 60000L, 1000) ||
+          PNG_OUT_OF_RANGE(int_x_blue,  15000,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_blue,   6000,  1000))
+         {
+            png_warning(png_ptr,
+              "Ignoring incorrect cHRM value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+            fprintf(stderr,"wx=%f, wy=%f, rx=%f, ry=%f\n",
+               white_x, white_y, red_x, red_y);
+            fprintf(stderr,"gx=%f, gy=%f, bx=%f, by=%f\n",
+               green_x, green_y, blue_x, blue_y);
+#else
+            fprintf(stderr,"wx=%ld, wy=%ld, rx=%ld, ry=%ld\n",
+               int_x_white, int_y_white, int_x_red, int_y_red);
+            fprintf(stderr,"gx=%ld, gy=%ld, bx=%ld, by=%ld\n",
+               int_x_green, int_y_green, int_x_blue, int_y_blue);
+#endif
+#endif /* PNG_NO_CONSOLE_IO */
+         }
+         png_crc_finish(png_ptr, 0);
+         return;
+      }
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   png_set_cHRM(png_ptr, info_ptr,
+      white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue);
+#endif
+   if (png_crc_finish(png_ptr, 0))
+      return;
+}
+#endif
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+void /* PRIVATE */
+png_handle_sRGB(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   int intent;
+   png_byte buf[1];
+
+   png_debug(1, "in png_handle_sRGB\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sRGB");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sRGB after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place sRGB chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
+   {
+      png_warning(png_ptr, "Duplicate sRGB chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 1)
+   {
+      png_warning(png_ptr, "Incorrect sRGB chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 1);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   intent = buf[0];
+   /* check for bad intent */
+   if (intent >= PNG_sRGB_INTENT_LAST)
+   {
+      png_warning(png_ptr, "Unknown sRGB intent");
+      return;
+   }
+
+#if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA))
+   {
+   png_fixed_point igamma;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      igamma=info_ptr->int_gamma;
+#else
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+      igamma=(png_fixed_point)(info_ptr->gamma * 100000.);
+#  endif
+#endif
+      if (PNG_OUT_OF_RANGE(igamma, 45500L, 500))
+      {
+         png_warning(png_ptr,
+           "Ignoring incorrect gAMA value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+         fprintf(stderr,"incorrect gamma=(%d/100000)\n",(int)png_ptr->int_gamma);
+#  else
+#    ifdef PNG_FLOATING_POINT_SUPPORTED
+         fprintf(stderr,"incorrect gamma=%f\n",png_ptr->gamma);
+#    endif
+#  endif
+#endif
+      }
+   }
+#endif /* PNG_READ_gAMA_SUPPORTED */
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+      if (PNG_OUT_OF_RANGE(info_ptr->int_x_white, 31270,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_white, 32900,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_red,   64000L, 1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_red,   33000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_green, 30000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_green, 60000L, 1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_blue,  15000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_blue,   6000,  1000))
+         {
+            png_warning(png_ptr,
+              "Ignoring incorrect cHRM value when sRGB is also present");
+         }
+#endif /* PNG_FIXED_POINT_SUPPORTED */
+#endif /* PNG_READ_cHRM_SUPPORTED */
+
+   png_set_sRGB_gAMA_and_cHRM(png_ptr, info_ptr, intent);
+}
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#if defined(PNG_READ_iCCP_SUPPORTED)
+void /* PRIVATE */
+png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+{
+   png_charp chunkdata;
+   png_byte compression_type;
+   png_bytep pC;
+   png_charp profile;
+   png_uint_32 skip = 0;
+   png_uint_32 profile_size, profile_length;
+   png_size_t slength, prefix_length, data_length;
+
+   png_debug(1, "in png_handle_iCCP\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before iCCP");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid iCCP after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place iCCP chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP))
+   {
+      png_warning(png_ptr, "Duplicate iCCP chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "iCCP chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc(png_ptr, length + 1);
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (profile = chunkdata; *profile; profile++)
+      /* empty loop to find end of name */ ;
+
+   ++profile;
+
+   /* there should be at least one zero (the compression type byte)
+      following the separator, and we should be on it  */
+   if ( profile >= chunkdata + slength - 1)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Malformed iCCP chunk");
+      return;
+   }
+
+   /* compression_type should always be zero */
+   compression_type = *profile++;
+   if (compression_type)
+   {
+      png_warning(png_ptr, "Ignoring nonzero compression type in iCCP chunk");
+      compression_type=0x00;  /* Reset it to zero (libpng-1.0.6 through 1.0.8
+                                 wrote nonzero) */
+   }
+
+   prefix_length = profile - chunkdata;
+   chunkdata = png_decompress_chunk(png_ptr, compression_type, chunkdata,
+                                    slength, prefix_length, &data_length);
+
+   profile_length = data_length - prefix_length;
+
+   if ( prefix_length > data_length || profile_length < 4)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Profile size field missing from iCCP chunk");
+      return;
+   }
+
+   /* Check the profile_size recorded in the first 32 bits of the ICC profile */
+   pC = (png_bytep)(chunkdata+prefix_length);
+   profile_size = ((*(pC  ))<<24) |
+                  ((*(pC+1))<<16) |
+                  ((*(pC+2))<< 8) |
+                  ((*(pC+3))    );
+
+   if(profile_size < profile_length)
+      profile_length = profile_size;
+
+   if(profile_size > profile_length)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Ignoring truncated iCCP profile.");
+      return;
+   }
+
+   png_set_iCCP(png_ptr, info_ptr, chunkdata, compression_type,
+                chunkdata + prefix_length, profile_length);
+   png_free(png_ptr, chunkdata);
+}
+#endif /* PNG_READ_iCCP_SUPPORTED */
+
+#if defined(PNG_READ_sPLT_SUPPORTED)
+void /* PRIVATE */
+png_handle_sPLT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+{
+   png_bytep chunkdata;
+   png_bytep entry_start;
+   png_sPLT_t new_palette;
+#ifdef PNG_NO_POINTER_INDEXING
+   png_sPLT_entryp pp;
+#endif
+   int data_length, entry_size, i;
+   png_uint_32 skip = 0;
+   png_size_t slength;
+
+   png_debug(1, "in png_handle_sPLT\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sPLT");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sPLT after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "sPLT chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   chunkdata = (png_bytep)png_malloc(png_ptr, length + 1);
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (entry_start = chunkdata; *entry_start; entry_start++)
+      /* empty loop to find end of name */ ;
+   ++entry_start;
+
+   /* a sample depth should follow the separator, and we should be on it  */
+   if (entry_start > chunkdata + slength - 2)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "malformed sPLT chunk");
+      return;
+   }
+
+   new_palette.depth = *entry_start++;
+   entry_size = (new_palette.depth == 8 ? 6 : 10);
+   data_length = (slength - (entry_start - chunkdata));
+
+   /* integrity-check the data length */
+   if (data_length % entry_size)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "sPLT chunk has bad length");
+      return;
+   }
+
+   new_palette.nentries = (png_int_32) ( data_length / entry_size);
+   if ((png_uint_32) new_palette.nentries > (png_uint_32) (PNG_SIZE_MAX /
+       png_sizeof(png_sPLT_entry)))
+   {
+       png_warning(png_ptr, "sPLT chunk too long");
+       return;
+   }
+   new_palette.entries = (png_sPLT_entryp)png_malloc_warn(
+       png_ptr, new_palette.nentries * png_sizeof(png_sPLT_entry));
+   if (new_palette.entries == NULL)
+   {
+       png_warning(png_ptr, "sPLT chunk requires too much memory");
+       return;
+   }
+
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+      png_sPLT_entryp pp = new_palette.entries + i;
+
+      if (new_palette.depth == 8)
+      {
+          pp->red = *entry_start++;
+          pp->green = *entry_start++;
+          pp->blue = *entry_start++;
+          pp->alpha = *entry_start++;
+      }
+      else
+      {
+          pp->red   = png_get_uint_16(entry_start); entry_start += 2;
+          pp->green = png_get_uint_16(entry_start); entry_start += 2;
+          pp->blue  = png_get_uint_16(entry_start); entry_start += 2;
+          pp->alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#else
+   pp = new_palette.entries;
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+
+      if (new_palette.depth == 8)
+      {
+          pp[i].red   = *entry_start++;
+          pp[i].green = *entry_start++;
+          pp[i].blue  = *entry_start++;
+          pp[i].alpha = *entry_start++;
+      }
+      else
+      {
+          pp[i].red   = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].green = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].blue  = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#endif
+
+   /* discard all chunk data except the name and stash that */
+   new_palette.name = (png_charp)chunkdata;
+
+   png_set_sPLT(png_ptr, info_ptr, &new_palette, 1);
+
+   png_free(png_ptr, chunkdata);
+   png_free(png_ptr, new_palette.entries);
+}
+#endif /* PNG_READ_sPLT_SUPPORTED */
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+void /* PRIVATE */
+png_handle_tRNS(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
+
+   png_debug(1, "in png_handle_tRNS\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before tRNS");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid tRNS after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+   {
+      png_warning(png_ptr, "Duplicate tRNS chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      png_byte buf[2];
+
+      if (length != 2)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      png_crc_read(png_ptr, buf, 2);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_values.gray = png_get_uint_16(buf);
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      png_byte buf[6];
+
+      if (length != 6)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      png_crc_read(png_ptr, buf, (png_size_t)length);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_values.red = png_get_uint_16(buf);
+      png_ptr->trans_values.green = png_get_uint_16(buf + 2);
+      png_ptr->trans_values.blue = png_get_uint_16(buf + 4);
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (!(png_ptr->mode & PNG_HAVE_PLTE))
+      {
+         /* Should be an error, but we can cope with it. */
+         png_warning(png_ptr, "Missing PLTE before tRNS");
+      }
+      if (length > (png_uint_32)png_ptr->num_palette ||
+          length > PNG_MAX_PALETTE_LENGTH)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      if (length == 0)
+      {
+         png_warning(png_ptr, "Zero length tRNS chunk");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      png_crc_read(png_ptr, readbuf, (png_size_t)length);
+      png_ptr->num_trans = (png_uint_16)length;
+   }
+   else
+   {
+      png_warning(png_ptr, "tRNS chunk not allowed with alpha channel");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_ptr->num_trans = 0;
+      return;
+   }
+
+   png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans,
+      &(png_ptr->trans_values));
+}
+#endif
+
+#if defined(PNG_READ_bKGD_SUPPORTED)
+void /* PRIVATE */
+png_handle_bKGD(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_size_t truelen;
+   png_byte buf[6];
+
+   png_debug(1, "in png_handle_bKGD\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before bKGD");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid bKGD after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+            !(png_ptr->mode & PNG_HAVE_PLTE))
+   {
+      png_warning(png_ptr, "Missing PLTE before bKGD");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD))
+   {
+      png_warning(png_ptr, "Duplicate bKGD chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      truelen = 1;
+   else if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      truelen = 6;
+   else
+      truelen = 2;
+
+   if (length != truelen)
+   {
+      png_warning(png_ptr, "Incorrect bKGD chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, truelen);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   /* We convert the index value into RGB components so that we can allow
+    * arbitrary RGB values for background when we have transparency, and
+    * so it is easy to determine the RGB values of the background color
+    * from the info_ptr struct. */
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      png_ptr->background.index = buf[0];
+      if (info_ptr && info_ptr->num_palette)
+      {
+          if(buf[0] > info_ptr->num_palette)
+          {
+             png_warning(png_ptr, "Incorrect bKGD chunk index value");
+             return;
+          }
+          png_ptr->background.red =
+             (png_uint_16)png_ptr->palette[buf[0]].red;
+          png_ptr->background.green =
+             (png_uint_16)png_ptr->palette[buf[0]].green;
+          png_ptr->background.blue =
+             (png_uint_16)png_ptr->palette[buf[0]].blue;
+      }
+   }
+   else if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) /* GRAY */
+   {
+      png_ptr->background.red =
+      png_ptr->background.green =
+      png_ptr->background.blue =
+      png_ptr->background.gray = png_get_uint_16(buf);
+   }
+   else
+   {
+      png_ptr->background.red = png_get_uint_16(buf);
+      png_ptr->background.green = png_get_uint_16(buf + 2);
+      png_ptr->background.blue = png_get_uint_16(buf + 4);
+   }
+
+   png_set_bKGD(png_ptr, info_ptr, &(png_ptr->background));
+}
+#endif
+
+#if defined(PNG_READ_hIST_SUPPORTED)
+void /* PRIVATE */
+png_handle_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   unsigned int num, i;
+   png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH];
+
+   png_debug(1, "in png_handle_hIST\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before hIST");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid hIST after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (!(png_ptr->mode & PNG_HAVE_PLTE))
+   {
+      png_warning(png_ptr, "Missing PLTE before hIST");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST))
+   {
+      png_warning(png_ptr, "Duplicate hIST chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   num = length / 2 ;
+   if (num != (unsigned int) png_ptr->num_palette || num >
+      (unsigned int) PNG_MAX_PALETTE_LENGTH)
+   {
+      png_warning(png_ptr, "Incorrect hIST chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[2];
+
+      png_crc_read(png_ptr, buf, 2);
+      readbuf[i] = png_get_uint_16(buf);
+   }
+
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   png_set_hIST(png_ptr, info_ptr, readbuf);
+}
+#endif
+
+#if defined(PNG_READ_pHYs_SUPPORTED)
+void /* PRIVATE */
+png_handle_pHYs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_uint_32 res_x, res_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_pHYs\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before pHYs");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid pHYs after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_warning(png_ptr, "Duplicate pHYs chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_warning(png_ptr, "Incorrect pHYs chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   res_x = png_get_uint_32(buf);
+   res_y = png_get_uint_32(buf + 4);
+   unit_type = buf[8];
+   png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type);
+}
+#endif
+
+#if defined(PNG_READ_oFFs_SUPPORTED)
+void /* PRIVATE */
+png_handle_oFFs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_int_32 offset_x, offset_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_oFFs\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before oFFs");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid oFFs after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
+   {
+      png_warning(png_ptr, "Duplicate oFFs chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_warning(png_ptr, "Incorrect oFFs chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   offset_x = png_get_int_32(buf);
+   offset_y = png_get_int_32(buf + 4);
+   unit_type = buf[8];
+   png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type);
+}
+#endif
+
+#if defined(PNG_READ_pCAL_SUPPORTED)
+/* read the pCAL chunk (described in the PNG Extensions document) */
+void /* PRIVATE */
+png_handle_pCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_charp purpose;
+   png_int_32 X0, X1;
+   png_byte type, nparams;
+   png_charp buf, units, endptr;
+   png_charpp params;
+   png_size_t slength;
+   int i;
+
+   png_debug(1, "in png_handle_pCAL\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before pCAL");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid pCAL after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL))
+   {
+      png_warning(png_ptr, "Duplicate pCAL chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading pCAL chunk data (%lu bytes)\n",
+      length + 1);
+   purpose = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (purpose == NULL)
+     {
+       png_warning(png_ptr, "No memory for pCAL purpose.");
+       return;
+     }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)purpose, slength);
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, purpose);
+      return;
+   }
+
+   purpose[slength] = 0x00; /* null terminate the last string */
+
+   png_debug(3, "Finding end of pCAL purpose string\n");
+   for (buf = purpose; *buf; buf++)
+      /* empty loop */ ;
+
+   endptr = purpose + slength;
+
+   /* We need to have at least 12 bytes after the purpose string
+      in order to get the parameter information. */
+   if (endptr <= buf + 12)
+   {
+      png_warning(png_ptr, "Invalid pCAL data");
+      png_free(png_ptr, purpose);
+      return;
+   }
+
+   png_debug(3, "Reading pCAL X0, X1, type, nparams, and units\n");
+   X0 = png_get_int_32((png_bytep)buf+1);
+   X1 = png_get_int_32((png_bytep)buf+5);
+   type = buf[9];
+   nparams = buf[10];
+   units = buf + 11;
+
+   png_debug(3, "Checking pCAL equation type and number of parameters\n");
+   /* Check that we have the right number of parameters for known
+      equation types. */
+   if ((type == PNG_EQUATION_LINEAR && nparams != 2) ||
+       (type == PNG_EQUATION_BASE_E && nparams != 3) ||
+       (type == PNG_EQUATION_ARBITRARY && nparams != 3) ||
+       (type == PNG_EQUATION_HYPERBOLIC && nparams != 4))
+   {
+      png_warning(png_ptr, "Invalid pCAL parameters for equation type");
+      png_free(png_ptr, purpose);
+      return;
+   }
+   else if (type >= PNG_EQUATION_LAST)
+   {
+      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
+   }
+
+   for (buf = units; *buf; buf++)
+      /* Empty loop to move past the units string. */ ;
+
+   png_debug(3, "Allocating pCAL parameters array\n");
+   params = (png_charpp)png_malloc_warn(png_ptr, (png_uint_32)(nparams
+      *png_sizeof(png_charp))) ;
+   if (params == NULL)
+     {
+       png_free(png_ptr, purpose);
+       png_warning(png_ptr, "No memory for pCAL params.");
+       return;
+     }
+
+   /* Get pointers to the start of each parameter string. */
+   for (i = 0; i < (int)nparams; i++)
+   {
+      buf++; /* Skip the null string terminator from previous parameter. */
+
+      png_debug1(3, "Reading pCAL parameter %d\n", i);
+      for (params[i] = buf; buf <= endptr && *buf != 0x00; buf++)
+         /* Empty loop to move past each parameter string */ ;
+
+      /* Make sure we haven't run out of data yet */
+      if (buf > endptr)
+      {
+         png_warning(png_ptr, "Invalid pCAL data");
+         png_free(png_ptr, purpose);
+         png_free(png_ptr, params);
+         return;
+      }
+   }
+
+   png_set_pCAL(png_ptr, info_ptr, purpose, X0, X1, type, nparams,
+      units, params);
+
+   png_free(png_ptr, purpose);
+   png_free(png_ptr, params);
+}
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED)
+/* read the sCAL chunk */
+void /* PRIVATE */
+png_handle_sCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_charp buffer, ep;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double width, height;
+   png_charp vp;
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp swidth, sheight;
+#endif
+#endif
+   png_size_t slength;
+
+   png_debug(1, "in png_handle_sCAL\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sCAL");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sCAL after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL))
+   {
+      png_warning(png_ptr, "Duplicate sCAL chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading sCAL chunk data (%lu bytes)\n",
+      length + 1);
+   buffer = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (buffer == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk");
+       return;
+     }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)buffer, slength);
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, buffer);
+      return;
+   }
+
+   buffer[slength] = 0x00; /* null terminate the last string */
+
+   ep = buffer + 1;        /* skip unit byte */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   width = png_strtod(png_ptr, ep, &vp);
+   if (*vp)
+   {
+       png_warning(png_ptr, "malformed width string in sCAL chunk");
+       return;
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   swidth = (png_charp)png_malloc_warn(png_ptr, png_strlen(ep) + 1);
+   if (swidth == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk width");
+       return;
+     }
+   png_memcpy(swidth, ep, (png_size_t)png_strlen(ep));
+#endif
+#endif
+
+   for (ep = buffer; *ep; ep++)
+      /* empty loop */ ;
+   ep++;
+
+   if (buffer + slength < ep)
+   {
+       png_warning(png_ptr, "Truncated sCAL chunk");
+#if defined(PNG_FIXED_POINT_SUPPORTED) && \
+    !defined(PNG_FLOATING_POINT_SUPPORTED)
+       png_free(png_ptr, swidth);
+#endif
+      png_free(png_ptr, buffer);
+       return;
+   }
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   height = png_strtod(png_ptr, ep, &vp);
+   if (*vp)
+   {
+       png_warning(png_ptr, "malformed height string in sCAL chunk");
+       return;
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   sheight = (png_charp)png_malloc_warn(png_ptr, png_strlen(ep) + 1);
+   if (sheight == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk height");
+       return;
+     }
+   png_memcpy(sheight, ep, (png_size_t)png_strlen(ep));
+#endif
+#endif
+
+   if (buffer + slength < ep
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      || width <= 0. || height <= 0.
+#endif
+      )
+   {
+      png_warning(png_ptr, "Invalid sCAL data");
+      png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+      png_free(png_ptr, swidth);
+      png_free(png_ptr, sheight);
+#endif
+      return;
+   }
+
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   png_set_sCAL(png_ptr, info_ptr, buffer[0], width, height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_sCAL_s(png_ptr, info_ptr, buffer[0], swidth, sheight);
+#endif
+#endif
+
+   png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+   png_free(png_ptr, swidth);
+   png_free(png_ptr, sheight);
+#endif
+}
+#endif
+
+#if defined(PNG_READ_tIME_SUPPORTED)
+void /* PRIVATE */
+png_handle_tIME(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[7];
+   png_time mod_time;
+
+   png_debug(1, "in png_handle_tIME\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Out of place tIME chunk");
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME))
+   {
+      png_warning(png_ptr, "Duplicate tIME chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+   if (length != 7)
+   {
+      png_warning(png_ptr, "Incorrect tIME chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 7);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   mod_time.second = buf[6];
+   mod_time.minute = buf[5];
+   mod_time.hour = buf[4];
+   mod_time.day = buf[3];
+   mod_time.month = buf[2];
+   mod_time.year = png_get_uint_16(buf);
+
+   png_set_tIME(png_ptr, info_ptr, &mod_time);
+}
+#endif
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp key;
+   png_charp text;
+   png_uint_32 skip = 0;
+   png_size_t slength;
+   int ret;
+
+   png_debug(1, "in png_handle_tEXt\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before tEXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "tEXt chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   key = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (key == NULL)
+   {
+     png_warning(png_ptr, "No memory to process text chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)key, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, key);
+      return;
+   }
+
+   key[slength] = 0x00;
+
+   for (text = key; *text; text++)
+      /* empty loop to find end of key */ ;
+
+   if (text != key + slength)
+      text++;
+
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+      (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr, "Not enough memory to process text chunk.");
+     png_free(png_ptr, key);
+     return;
+   }
+   text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
+   text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+   text_ptr->lang = NULL;
+   text_ptr->lang_key = NULL;
+   text_ptr->itxt_length = 0;
+#endif
+   text_ptr->text = text;
+   text_ptr->text_length = png_strlen(text);
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, key);
+   png_free(png_ptr, text_ptr);
+   if (ret)
+     png_warning(png_ptr, "Insufficient memory to process text chunk.");
+}
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+/* note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp chunkdata;
+   png_charp text;
+   int comp_type;
+   int ret;
+   png_size_t slength, prefix_len, data_len;
+
+   png_debug(1, "in png_handle_zTXt\n");
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before zTXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We will no doubt have problems with chunks even half this size, but
+      there is no hard and fast rule to tell us where to stop. */
+   if (length > (png_uint_32)65535L)
+   {
+     png_warning(png_ptr,"zTXt chunk too large to fit in memory");
+     png_crc_finish(png_ptr, length);
+     return;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (chunkdata == NULL)
+   {
+     png_warning(png_ptr,"Out of memory processing zTXt chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (text = chunkdata; *text; text++)
+      /* empty loop */ ;
+
+   /* zTXt must have some text after the chunkdataword */
+   if (text >= chunkdata + slength - 2)
+   {
+      png_warning(png_ptr, "Truncated zTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+   else
+   {
+       comp_type = *(++text);
+       if (comp_type != PNG_TEXT_COMPRESSION_zTXt)
+       {
+          png_warning(png_ptr, "Unknown compression type in zTXt chunk");
+          comp_type = PNG_TEXT_COMPRESSION_zTXt;
+       }
+       text++;        /* skip the compression_method byte */
+   }
+   prefix_len = text - chunkdata;
+
+   chunkdata = (png_charp)png_decompress_chunk(png_ptr, comp_type, chunkdata,
+                                    (png_size_t)length, prefix_len, &data_len);
+
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+     (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr,"Not enough memory to process zTXt chunk.");
+     png_free(png_ptr, chunkdata);
+     return;
+   }
+   text_ptr->compression = comp_type;
+   text_ptr->key = chunkdata;
+#ifdef PNG_iTXt_SUPPORTED
+   text_ptr->lang = NULL;
+   text_ptr->lang_key = NULL;
+   text_ptr->itxt_length = 0;
+#endif
+   text_ptr->text = chunkdata + prefix_len;
+   text_ptr->text_length = data_len;
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, text_ptr);
+   png_free(png_ptr, chunkdata);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store zTXt chunk.");
+}
+#endif
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+/* note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp chunkdata;
+   png_charp key, lang, text, lang_key;
+   int comp_flag;
+   int comp_type = 0;
+   int ret;
+   png_size_t slength, prefix_len, data_len;
+
+   png_debug(1, "in png_handle_iTXt\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before iTXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We will no doubt have problems with chunks even half this size, but
+      there is no hard and fast rule to tell us where to stop. */
+   if (length > (png_uint_32)65535L)
+   {
+     png_warning(png_ptr,"iTXt chunk too large to fit in memory");
+     png_crc_finish(png_ptr, length);
+     return;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (chunkdata == NULL)
+   {
+     png_warning(png_ptr, "No memory to process iTXt chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (lang = chunkdata; *lang; lang++)
+      /* empty loop */ ;
+   lang++;        /* skip NUL separator */
+
+   /* iTXt must have a language tag (possibly empty), two compression bytes,
+      translated keyword (possibly empty), and possibly some text after the
+      keyword */
+
+   if (lang >= chunkdata + slength - 3)
+   {
+      png_warning(png_ptr, "Truncated iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+   else
+   {
+       comp_flag = *lang++;
+       comp_type = *lang++;
+   }
+
+   for (lang_key = lang; *lang_key; lang_key++)
+      /* empty loop */ ;
+   lang_key++;        /* skip NUL separator */
+
+   if (lang_key >= chunkdata + slength)
+   {
+      png_warning(png_ptr, "Truncated iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   for (text = lang_key; *text; text++)
+      /* empty loop */ ;
+   text++;        /* skip NUL separator */
+   if (text >= chunkdata + slength)
+   {
+      png_warning(png_ptr, "Malformed iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   prefix_len = text - chunkdata;
+
+   key=chunkdata;
+   if (comp_flag)
+       chunkdata = png_decompress_chunk(png_ptr, comp_type, chunkdata,
+          (size_t)length, prefix_len, &data_len);
+   else
+       data_len=png_strlen(chunkdata + prefix_len);
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+      (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr,"Not enough memory to process iTXt chunk.");
+     png_free(png_ptr, chunkdata);
+     return;
+   }
+   text_ptr->compression = (int)comp_flag + 1;
+   text_ptr->lang_key = chunkdata+(lang_key-key);
+   text_ptr->lang = chunkdata+(lang-key);
+   text_ptr->itxt_length = data_len;
+   text_ptr->text_length = 0;
+   text_ptr->key = chunkdata;
+   text_ptr->text = chunkdata + prefix_len;
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, text_ptr);
+   png_free(png_ptr, chunkdata);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store iTXt chunk.");
+}
+#endif
+
+/* This function is called when we haven't found a handler for a
+   chunk.  If there isn't a problem with the chunk itself (ie bad
+   chunk name, CRC, or a critical chunk), the chunk is silently ignored
+   -- unless the PNG_FLAG_UNKNOWN_CHUNKS_SUPPORTED flag is on in which
+   case it will be saved away to be written out later. */
+void /* PRIVATE */
+png_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_uint_32 skip = 0;
+
+   png_debug(1, "in png_handle_unknown\n");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IDAT;
+#endif
+      if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))  /* not an IDAT */
+         png_ptr->mode |= PNG_AFTER_IDAT;
+   }
+
+   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+
+   if (!(png_ptr->chunk_name[0] & 0x20))
+   {
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+      if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+           PNG_HANDLE_CHUNK_ALWAYS
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+           && png_ptr->read_user_chunk_fn == NULL
+#endif
+        )
+#endif
+          png_chunk_error(png_ptr, "unknown critical chunk");
+   }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if ((png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS) ||
+       (png_ptr->read_user_chunk_fn != NULL))
+   {
+#ifdef PNG_MAX_MALLOC_64K
+       if (length > (png_uint_32)65535L)
+       {
+           png_warning(png_ptr, "unknown chunk too large to fit in memory");
+           skip = length - (png_uint_32)65535L;
+           length = (png_uint_32)65535L;
+       }
+#endif
+       png_memcpy((png_charp)png_ptr->unknown_chunk.name,
+                  (png_charp)png_ptr->chunk_name, 
+                  png_sizeof(png_ptr->unknown_chunk.name));
+       png_ptr->unknown_chunk.name[png_sizeof(png_ptr->unknown_chunk.name)-1] = '\0';
+       png_ptr->unknown_chunk.size = (png_size_t)length;
+       if (length == 0)
+         png_ptr->unknown_chunk.data = NULL;
+       else
+       {
+         png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
+         png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
+       }
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+       if(png_ptr->read_user_chunk_fn != NULL)
+       {
+          /* callback to user unknown chunk handler */
+          int ret;
+          ret = (*(png_ptr->read_user_chunk_fn))
+            (png_ptr, &png_ptr->unknown_chunk);
+          if (ret < 0)
+             png_chunk_error(png_ptr, "error in user chunk");
+          if (ret == 0)
+          {
+             if (!(png_ptr->chunk_name[0] & 0x20))
+                if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+                     PNG_HANDLE_CHUNK_ALWAYS)
+                   png_chunk_error(png_ptr, "unknown critical chunk");
+             png_set_unknown_chunks(png_ptr, info_ptr,
+               &png_ptr->unknown_chunk, 1);
+          }
+       }
+       else
+#endif
+       png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1);
+       png_free(png_ptr, png_ptr->unknown_chunk.data);
+       png_ptr->unknown_chunk.data = NULL;
+   }
+   else
+#endif
+      skip = length;
+
+   png_crc_finish(png_ptr, skip);
+
+#if !defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+   info_ptr = info_ptr; /* quiet compiler warnings about unused info_ptr */
+#endif
+}
+
+/* This function is called to verify that a chunk name is valid.
+   This function can't have the "critical chunk check" incorporated
+   into it, since in the future we will need to be able to call user
+   functions to handle unknown critical chunks after we check that
+   the chunk name itself is valid. */
+
+#define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+
+void /* PRIVATE */
+png_check_chunk_name(png_structp png_ptr, png_bytep chunk_name)
+{
+   png_debug(1, "in png_check_chunk_name\n");
+   if (isnonalpha(chunk_name[0]) || isnonalpha(chunk_name[1]) ||
+       isnonalpha(chunk_name[2]) || isnonalpha(chunk_name[3]))
+   {
+      png_chunk_error(png_ptr, "invalid chunk type");
+   }
+}
+
+/* Combines the row recently read in with the existing pixels in the
+   row.  This routine takes care of alpha and transparency if requested.
+   This routine also handles the two methods of progressive display
+   of interlaced images, depending on the mask value.
+   The mask value describes which pixels are to be combined with
+   the row.  The pattern always repeats every 8 pixels, so just 8
+   bits are needed.  A one indicates the pixel is to be combined,
+   a zero indicates the pixel is to be skipped.  This is in addition
+   to any alpha or transparency value associated with the pixel.  If
+   you want all pixels to be combined, pass 0xff (255) in mask.  */
+
+void /* PRIVATE */
+png_combine_row(png_structp png_ptr, png_bytep row, int mask)
+{
+   png_debug(1,"in png_combine_row\n");
+   if (mask == 0xff)
+   {
+      png_memcpy(row, png_ptr->row_buf + 1,
+         PNG_ROWBYTES(png_ptr->row_info.pixel_depth, png_ptr->width));
+   }
+   else
+   {
+      switch (png_ptr->row_info.pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_inc, s_start, s_end;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+            else
+#endif
+            {
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  int value;
+
+                  value = (*sp >> shift) & 0x01;
+                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_start, s_end, s_inc;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            int value;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+            else
+#endif
+            {
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0x03;
+                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_start, s_end, s_inc;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            int value;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+            else
+#endif
+            {
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0xf;
+                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         default:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            png_size_t pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            png_byte m = 0x80;
+
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  png_memcpy(dp, sp, pixel_bytes);
+               }
+
+               sp += pixel_bytes;
+               dp += pixel_bytes;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+      }
+   }
+}
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+/* OLD pre-1.0.9 interface:
+void png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
+   png_uint_32 transformations)
+ */
+void /* PRIVATE */
+png_do_read_interlace(png_structp png_ptr)
+{
+   png_row_infop row_info = &(png_ptr->row_info);
+   png_bytep row = png_ptr->row_buf + 1;
+   int pass = png_ptr->pass;
+   png_uint_32 transformations = png_ptr->transformations;
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+#endif
+
+   png_debug(1,"in png_do_read_interlace\n");
+   if (row != NULL && row_info != NULL)
+   {
+      png_uint_32 final_width;
+
+      final_width = row_info->width * png_pass_inc[pass];
+
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 3);
+            png_bytep dp = row + (png_size_t)((final_width - 1) >> 3);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            int jstop = png_pass_inc[pass];
+            png_byte v;
+            png_uint_32 i;
+            int j;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+                sshift = (int)((row_info->width + 7) & 0x07);
+                dshift = (int)((final_width + 7) & 0x07);
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+            else
+#endif
+            {
+                sshift = 7 - (int)((row_info->width + 7) & 0x07);
+                dshift = 7 - (int)((final_width + 7) & 0x07);
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               v = (png_byte)((*sp >> sshift) & 0x01);
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2);
+            png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            int jstop = png_pass_inc[pass];
+            png_uint_32 i;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)(((row_info->width + 3) & 0x03) << 1);
+               dshift = (int)(((final_width + 3) & 0x03) << 1);
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+            else
+#endif
+            {
+               sshift = (int)((3 - ((row_info->width + 3) & 0x03)) << 1);
+               dshift = (int)((3 - ((final_width + 3) & 0x03)) << 1);
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0x03);
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 1);
+            png_bytep dp = row + (png_size_t)((final_width - 1) >> 1);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+            int jstop = png_pass_inc[pass];
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)(((row_info->width + 1) & 0x01) << 2);
+               dshift = (int)(((final_width + 1) & 0x01) << 2);
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            else
+#endif
+            {
+               sshift = (int)((1 - ((row_info->width + 1) & 0x01)) << 2);
+               dshift = (int)((1 - ((final_width + 1) & 0x01)) << 2);
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v = (png_byte)((*sp >> sshift) & 0xf);
+               int j;
+
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         default:
+         {
+            png_size_t pixel_bytes = (row_info->pixel_depth >> 3);
+            png_bytep sp = row + (png_size_t)(row_info->width - 1) * pixel_bytes;
+            png_bytep dp = row + (png_size_t)(final_width - 1) * pixel_bytes;
+
+            int jstop = png_pass_inc[pass];
+            png_uint_32 i;
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v[8];
+               int j;
+
+               png_memcpy(v, sp, pixel_bytes);
+               for (j = 0; j < jstop; j++)
+               {
+                  png_memcpy(dp, v, pixel_bytes);
+                  dp -= pixel_bytes;
+               }
+               sp -= pixel_bytes;
+            }
+            break;
+         }
+      }
+      row_info->width = final_width;
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
+   }
+#if !defined(PNG_READ_PACKSWAP_SUPPORTED)
+   transformations = transformations; /* silence compiler warning */
+#endif
+}
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+
+void /* PRIVATE */
+png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep row,
+   png_bytep prev_row, int filter)
+{
+   png_debug(1, "in png_read_filter_row\n");
+   png_debug2(2,"row = %lu, filter = %d\n", png_ptr->row_number, filter);
+   switch (filter)
+   {
+      case PNG_FILTER_VALUE_NONE:
+         break;
+      case PNG_FILTER_VALUE_SUB:
+      {
+         png_uint_32 i;
+         png_uint_32 istop = row_info->rowbytes;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_bytep rp = row + bpp;
+         png_bytep lp = row;
+
+         for (i = bpp; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_UP:
+      {
+         png_uint_32 i;
+         png_uint_32 istop = row_info->rowbytes;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+
+         for (i = 0; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_AVG:
+      {
+         png_uint_32 i;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+         png_bytep lp = row;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_uint_32 istop = row_info->rowbytes - bpp;
+
+         for (i = 0; i < bpp; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) +
+               ((int)(*pp++) / 2 )) & 0xff);
+            rp++;
+         }
+
+         for (i = 0; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) +
+               (int)(*pp++ + *lp++) / 2 ) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_PAETH:
+      {
+         png_uint_32 i;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+         png_bytep lp = row;
+         png_bytep cp = prev_row;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_uint_32 istop=row_info->rowbytes - bpp;
+
+         for (i = 0; i < bpp; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+            rp++;
+         }
+
+         for (i = 0; i < istop; i++)   /* use leftover rp,pp */
+         {
+            int a, b, c, pa, pb, pc, p;
+
+            a = *lp++;
+            b = *pp++;
+            c = *cp++;
+
+            p = b - c;
+            pc = a - c;
+
+#ifdef PNG_USE_ABS
+            pa = abs(p);
+            pb = abs(pc);
+            pc = abs(p + pc);
+#else
+            pa = p < 0 ? -p : p;
+            pb = pc < 0 ? -pc : pc;
+            pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+            /*
+               if (pa <= pb && pa <= pc)
+                  p = a;
+               else if (pb <= pc)
+                  p = b;
+               else
+                  p = c;
+             */
+
+            p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+            *rp = (png_byte)(((int)(*rp) + p) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      default:
+         png_warning(png_ptr, "Ignoring bad adaptive filter type");
+         *row=0;
+         break;
+   }
+}
+
+void /* PRIVATE */
+png_read_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+#endif
+
+   png_debug(1, "in png_read_finish_row\n");
+   png_ptr->row_number++;
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      png_memset_check(png_ptr, png_ptr->prev_row, 0,
+         png_ptr->rowbytes + 1);
+      do
+      {
+         png_ptr->pass++;
+         if (png_ptr->pass >= 7)
+            break;
+         png_ptr->iwidth = (png_ptr->width +
+            png_pass_inc[png_ptr->pass] - 1 -
+            png_pass_start[png_ptr->pass]) /
+            png_pass_inc[png_ptr->pass];
+
+         png_ptr->irowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,
+            png_ptr->iwidth) + 1;
+
+         if (!(png_ptr->transformations & PNG_INTERLACE))
+         {
+            png_ptr->num_rows = (png_ptr->height +
+               png_pass_yinc[png_ptr->pass] - 1 -
+               png_pass_ystart[png_ptr->pass]) /
+               png_pass_yinc[png_ptr->pass];
+            if (!(png_ptr->num_rows))
+               continue;
+         }
+         else  /* if (png_ptr->transformations & PNG_INTERLACE) */
+            break;
+      } while (png_ptr->iwidth == 0);
+
+      if (png_ptr->pass < 7)
+         return;
+   }
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IDAT;
+#endif
+      char extra;
+      int ret;
+
+      png_ptr->zstream.next_out = (Byte *)&extra;
+      png_ptr->zstream.avail_out = (uInt)1;
+      for(;;)
+      {
+         if (!(png_ptr->zstream.avail_in))
+         {
+            while (!png_ptr->idat_size)
+            {
+               png_byte chunk_length[4];
+
+               png_crc_finish(png_ptr, 0);
+
+               png_read_data(png_ptr, chunk_length, 4);
+               png_ptr->idat_size = png_get_uint_31(png_ptr, chunk_length);
+               png_reset_crc(png_ptr);
+               png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+               if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+                  png_error(png_ptr, "Not enough image data");
+
+            }
+            png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
+            png_ptr->zstream.next_in = png_ptr->zbuf;
+            if (png_ptr->zbuf_size > png_ptr->idat_size)
+               png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
+            png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zstream.avail_in);
+            png_ptr->idat_size -= png_ptr->zstream.avail_in;
+         }
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret == Z_STREAM_END)
+         {
+            if (!(png_ptr->zstream.avail_out) || png_ptr->zstream.avail_in ||
+               png_ptr->idat_size)
+               png_warning(png_ptr, "Extra compressed data");
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+         if (ret != Z_OK)
+            png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
+                      "Decompression Error");
+
+         if (!(png_ptr->zstream.avail_out))
+         {
+            png_warning(png_ptr, "Extra compressed data.");
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+
+      }
+      png_ptr->zstream.avail_out = 0;
+   }
+
+   if (png_ptr->idat_size || png_ptr->zstream.avail_in)
+      png_warning(png_ptr, "Extra compression data");
+
+   inflateReset(&png_ptr->zstream);
+
+   png_ptr->mode |= PNG_AFTER_IDAT;
+}
+
+void /* PRIVATE */
+png_read_start_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+#endif
+
+   int max_pixel_depth;
+   png_uint_32 row_bytes;
+
+   png_debug(1, "in png_read_start_row\n");
+   png_ptr->zstream.avail_in = 0;
+   png_init_read_transformations(png_ptr);
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   if (png_ptr->interlaced)
+   {
+      if (!(png_ptr->transformations & PNG_INTERLACE))
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+            png_pass_ystart[0]) / png_pass_yinc[0];
+      else
+         png_ptr->num_rows = png_ptr->height;
+
+      png_ptr->iwidth = (png_ptr->width +
+         png_pass_inc[png_ptr->pass] - 1 -
+         png_pass_start[png_ptr->pass]) /
+         png_pass_inc[png_ptr->pass];
+
+         row_bytes = PNG_ROWBYTES(png_ptr->pixel_depth,png_ptr->iwidth) + 1;
+
+         png_ptr->irowbytes = (png_size_t)row_bytes;
+         if((png_uint_32)png_ptr->irowbytes != row_bytes)
+            png_error(png_ptr, "Rowbytes overflow in png_read_start_row");
+   }
+   else
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->iwidth = png_ptr->width;
+      png_ptr->irowbytes = png_ptr->rowbytes + 1;
+   }
+   max_pixel_depth = png_ptr->pixel_depth;
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACK) && png_ptr->bit_depth < 8)
+      max_pixel_depth = 8;
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (png_ptr->num_trans)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 24;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth < 8)
+            max_pixel_depth = 8;
+         if (png_ptr->num_trans)
+            max_pixel_depth *= 2;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (png_ptr->num_trans)
+         {
+            max_pixel_depth *= 4;
+            max_pixel_depth /= 3;
+         }
+      }
+   }
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & (PNG_FILLER))
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         max_pixel_depth = 32;
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth <= 8)
+            max_pixel_depth = 16;
+         else
+            max_pixel_depth = 32;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (max_pixel_depth <= 32)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 64;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+   {
+      if (
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+        (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND)) ||
+#endif
+#if defined(PNG_READ_FILLER_SUPPORTED)
+        (png_ptr->transformations & (PNG_FILLER)) ||
+#endif
+        png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         if (max_pixel_depth <= 16)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 64;
+      }
+      else
+      {
+         if (max_pixel_depth <= 8)
+           {
+             if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+               max_pixel_depth = 32;
+             else
+               max_pixel_depth = 24;
+           }
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            max_pixel_depth = 64;
+         else
+            max_pixel_depth = 48;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \
+defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   if(png_ptr->transformations & PNG_USER_TRANSFORM)
+     {
+       int user_pixel_depth=png_ptr->user_transform_depth*
+         png_ptr->user_transform_channels;
+       if(user_pixel_depth > max_pixel_depth)
+         max_pixel_depth=user_pixel_depth;
+     }
+#endif
+
+   /* align the width on the next larger 8 pixels.  Mainly used
+      for interlacing */
+   row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7));
+   /* calculate the maximum bytes needed, adding a byte and a pixel
+      for safety's sake */
+   row_bytes = PNG_ROWBYTES(max_pixel_depth,row_bytes) +
+      1 + ((max_pixel_depth + 7) >> 3);
+#ifdef PNG_MAX_MALLOC_64K
+   if (row_bytes > (png_uint_32)65536L)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+#endif
+
+   if(row_bytes + 64 > png_ptr->old_big_row_buf_size)
+   {
+     png_free(png_ptr,png_ptr->big_row_buf);
+     png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes+64);
+     png_ptr->row_buf = png_ptr->big_row_buf+32;
+     png_ptr->old_big_row_buf_size = row_bytes+64;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if ((png_uint_32)png_ptr->rowbytes + 1 > (png_uint_32)65536L)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+#endif
+   if ((png_uint_32)png_ptr->rowbytes > (png_uint_32)(PNG_SIZE_MAX - 1))
+      png_error(png_ptr, "Row has too many bytes to allocate in memory.");
+
+   if(png_ptr->rowbytes+1 > png_ptr->old_prev_row_size)
+   {
+     png_free(png_ptr,png_ptr->prev_row);
+     png_ptr->prev_row = (png_bytep)png_malloc(png_ptr, (png_uint_32)(
+        png_ptr->rowbytes + 1));
+     png_ptr->old_prev_row_size = png_ptr->rowbytes+1;
+   }
+
+   png_memset_check(png_ptr, png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+
+   png_debug1(3, "width = %lu,\n", png_ptr->width);
+   png_debug1(3, "height = %lu,\n", png_ptr->height);
+   png_debug1(3, "iwidth = %lu,\n", png_ptr->iwidth);
+   png_debug1(3, "num_rows = %lu\n", png_ptr->num_rows);
+   png_debug1(3, "rowbytes = %lu,\n", png_ptr->rowbytes);
+   png_debug1(3, "irowbytes = %lu,\n", png_ptr->irowbytes);
+
+   png_ptr->flags |= PNG_FLAG_ROW_INIT;
+}
+#endif /* PNG_READ_SUPPORTED */
diff --git a/PNG/pngset.c b/PNG/pngset.c
new file mode 100644
index 0000000..8b25ca5
--- /dev/null
+++ b/PNG/pngset.c
@@ -0,0 +1,1268 @@
+
+/* pngset.c - storage of image information into info struct
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * The functions here are used during reads to store data from the file
+ * into the info struct, and during writes to store application data
+ * into the info struct for writing into the file.  This abstracts the
+ * info struct and allows us to change the structure in the future.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+#if defined(PNG_bKGD_SUPPORTED)
+void PNGAPI
+png_set_bKGD(png_structp png_ptr, png_infop info_ptr, png_color_16p background)
+{
+   png_debug1(1, "in %s storage function\n", "bKGD");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_memcpy(&(info_ptr->background), background, png_sizeof(png_color_16));
+   info_ptr->valid |= PNG_INFO_bKGD;
+}
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_cHRM(png_structp png_ptr, png_infop info_ptr,
+   double white_x, double white_y, double red_x, double red_y,
+   double green_x, double green_y, double blue_x, double blue_y)
+{
+   png_debug1(1, "in %s storage function\n", "cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (!(white_x || white_y || red_x || red_y || green_x || green_y ||
+       blue_x || blue_y))
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set all-zero chromaticity values");
+      return;
+   }
+   if (white_x < 0.0 || white_y < 0.0 ||
+         red_x < 0.0 ||   red_y < 0.0 ||
+       green_x < 0.0 || green_y < 0.0 ||
+        blue_x < 0.0 ||  blue_y < 0.0)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set negative chromaticity value");
+      return;
+   }
+   if (white_x > 21474.83 || white_y > 21474.83 ||
+         red_x > 21474.83 ||   red_y > 21474.83 ||
+       green_x > 21474.83 || green_y > 21474.83 ||
+        blue_x > 21474.83 ||  blue_y > 21474.83)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set chromaticity value exceeding 21474.83");
+      return;
+   }
+
+   info_ptr->x_white = (float)white_x;
+   info_ptr->y_white = (float)white_y;
+   info_ptr->x_red   = (float)red_x;
+   info_ptr->y_red   = (float)red_y;
+   info_ptr->x_green = (float)green_x;
+   info_ptr->y_green = (float)green_y;
+   info_ptr->x_blue  = (float)blue_x;
+   info_ptr->y_blue  = (float)blue_y;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_x_white = (png_fixed_point)(white_x*100000.+0.5);
+   info_ptr->int_y_white = (png_fixed_point)(white_y*100000.+0.5);
+   info_ptr->int_x_red   = (png_fixed_point)(  red_x*100000.+0.5);
+   info_ptr->int_y_red   = (png_fixed_point)(  red_y*100000.+0.5);
+   info_ptr->int_x_green = (png_fixed_point)(green_x*100000.+0.5);
+   info_ptr->int_y_green = (png_fixed_point)(green_y*100000.+0.5);
+   info_ptr->int_x_blue  = (png_fixed_point)( blue_x*100000.+0.5);
+   info_ptr->int_y_blue  = (png_fixed_point)( blue_y*100000.+0.5);
+#endif
+   info_ptr->valid |= PNG_INFO_cHRM;
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
+   png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
+   png_fixed_point blue_x, png_fixed_point blue_y)
+{
+   png_debug1(1, "in %s storage function\n", "cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (!(white_x || white_y || red_x || red_y || green_x || green_y ||
+       blue_x || blue_y))
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set all-zero chromaticity values");
+      return;
+   }
+   if (white_x < 0 || white_y < 0 ||
+         red_x < 0 ||   red_y < 0 ||
+       green_x < 0 || green_y < 0 ||
+        blue_x < 0 ||  blue_y < 0)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set negative chromaticity value");
+      return;
+   }
+   if (white_x > (png_fixed_point) PNG_UINT_31_MAX ||
+       white_y > (png_fixed_point) PNG_UINT_31_MAX ||
+         red_x > (png_fixed_point) PNG_UINT_31_MAX ||
+         red_y > (png_fixed_point) PNG_UINT_31_MAX ||
+       green_x > (png_fixed_point) PNG_UINT_31_MAX ||
+       green_y > (png_fixed_point) PNG_UINT_31_MAX ||
+        blue_x > (png_fixed_point) PNG_UINT_31_MAX ||
+        blue_y > (png_fixed_point) PNG_UINT_31_MAX )
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set chromaticity value exceeding 21474.83");
+      return;
+   }
+   info_ptr->int_x_white = white_x;
+   info_ptr->int_y_white = white_y;
+   info_ptr->int_x_red   = red_x;
+   info_ptr->int_y_red   = red_y;
+   info_ptr->int_x_green = green_x;
+   info_ptr->int_y_green = green_y;
+   info_ptr->int_x_blue  = blue_x;
+   info_ptr->int_y_blue  = blue_y;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->x_white = (float)(white_x/100000.);
+   info_ptr->y_white = (float)(white_y/100000.);
+   info_ptr->x_red   = (float)(  red_x/100000.);
+   info_ptr->y_red   = (float)(  red_y/100000.);
+   info_ptr->x_green = (float)(green_x/100000.);
+   info_ptr->y_green = (float)(green_y/100000.);
+   info_ptr->x_blue  = (float)( blue_x/100000.);
+   info_ptr->y_blue  = (float)( blue_y/100000.);
+#endif
+   info_ptr->valid |= PNG_INFO_cHRM;
+}
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_gAMA(png_structp png_ptr, png_infop info_ptr, double file_gamma)
+{
+   double gamma;
+   png_debug1(1, "in %s storage function\n", "gAMA");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* Check for overflow */
+   if (file_gamma > 21474.83)
+   {
+      png_warning(png_ptr, "Limiting gamma to 21474.83");
+      gamma=21474.83;
+   }
+   else
+      gamma=file_gamma;
+   info_ptr->gamma = (float)gamma;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_gamma = (int)(gamma*100000.+.5);
+#endif
+   info_ptr->valid |= PNG_INFO_gAMA;
+   if(gamma == 0.0)
+      png_warning(png_ptr, "Setting gamma=0");
+}
+#endif
+void PNGAPI
+png_set_gAMA_fixed(png_structp png_ptr, png_infop info_ptr, png_fixed_point
+   int_gamma)
+{
+   png_fixed_point gamma;
+
+   png_debug1(1, "in %s storage function\n", "gAMA");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (int_gamma > (png_fixed_point) PNG_UINT_31_MAX)
+   {
+     png_warning(png_ptr, "Limiting gamma to 21474.83");
+     gamma=PNG_UINT_31_MAX;
+   }
+   else
+   {
+     if (int_gamma < 0)
+     {
+       png_warning(png_ptr, "Setting negative gamma to zero");
+       gamma=0;
+     }
+     else
+       gamma=int_gamma;
+   }
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->gamma = (float)(gamma/100000.);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_gamma = gamma;
+#endif
+   info_ptr->valid |= PNG_INFO_gAMA;
+   if(gamma == 0)
+      png_warning(png_ptr, "Setting gamma=0");
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+void PNGAPI
+png_set_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_16p hist)
+{
+   int i;
+
+   png_debug1(1, "in %s storage function\n", "hIST");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (info_ptr->num_palette == 0 || info_ptr->num_palette
+       > PNG_MAX_PALETTE_LENGTH)
+   {
+       png_warning(png_ptr,
+          "Invalid palette size, hIST allocation skipped.");
+       return;
+   }
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0);
+#endif
+   /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in version
+      1.2.1 */
+   png_ptr->hist = (png_uint_16p)png_malloc_warn(png_ptr,
+      (png_uint_32)(PNG_MAX_PALETTE_LENGTH * png_sizeof (png_uint_16)));
+   if (png_ptr->hist == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for hIST chunk data.");
+       return;
+     }
+
+   for (i = 0; i < info_ptr->num_palette; i++)
+       png_ptr->hist[i] = hist[i];
+   info_ptr->hist = png_ptr->hist;
+   info_ptr->valid |= PNG_INFO_hIST;
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_HIST;
+#else
+   png_ptr->flags |= PNG_FLAG_FREE_HIST;
+#endif
+}
+#endif
+
+void PNGAPI
+png_set_IHDR(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 width, png_uint_32 height, int bit_depth,
+   int color_type, int interlace_type, int compression_type,
+   int filter_type)
+{
+   png_debug1(1, "in %s storage function\n", "IHDR");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* check for width and height valid values */
+   if (width == 0 || height == 0)
+      png_error(png_ptr, "Image width or height is zero in IHDR");
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (width > png_ptr->user_width_max || height > png_ptr->user_height_max)
+      png_error(png_ptr, "image size exceeds user limits in IHDR");
+#else
+   if (width > PNG_USER_WIDTH_MAX || height > PNG_USER_HEIGHT_MAX)
+      png_error(png_ptr, "image size exceeds user limits in IHDR");
+#endif
+   if (width > PNG_UINT_31_MAX || height > PNG_UINT_31_MAX)
+      png_error(png_ptr, "Invalid image size in IHDR");
+   if ( width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      png_warning(png_ptr, "Width is too large for libpng to process pixels");
+
+   /* check other values */
+   if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 &&
+      bit_depth != 8 && bit_depth != 16)
+      png_error(png_ptr, "Invalid bit depth in IHDR");
+
+   if (color_type < 0 || color_type == 1 ||
+      color_type == 5 || color_type > 6)
+      png_error(png_ptr, "Invalid color type in IHDR");
+
+   if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) ||
+       ((color_type == PNG_COLOR_TYPE_RGB ||
+         color_type == PNG_COLOR_TYPE_GRAY_ALPHA ||
+         color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8))
+      png_error(png_ptr, "Invalid color type/bit depth combination in IHDR");
+
+   if (interlace_type >= PNG_INTERLACE_LAST)
+      png_error(png_ptr, "Unknown interlace method in IHDR");
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+      png_error(png_ptr, "Unknown compression method in IHDR");
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   /* Accept filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not read a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)&&png_ptr->mng_features_permitted)
+      png_warning(png_ptr,"MNG features are not allowed in a PNG datastream");
+   if(filter_type != PNG_FILTER_TYPE_BASE)
+   {
+     if(!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+        (filter_type == PNG_INTRAPIXEL_DIFFERENCING) &&
+        ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) &&
+        (color_type == PNG_COLOR_TYPE_RGB ||
+         color_type == PNG_COLOR_TYPE_RGB_ALPHA)))
+        png_error(png_ptr, "Unknown filter method in IHDR");
+     if(png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)
+        png_warning(png_ptr, "Invalid filter method in IHDR");
+   }
+#else
+   if(filter_type != PNG_FILTER_TYPE_BASE)
+      png_error(png_ptr, "Unknown filter method in IHDR");
+#endif
+
+   info_ptr->width = width;
+   info_ptr->height = height;
+   info_ptr->bit_depth = (png_byte)bit_depth;
+   info_ptr->color_type =(png_byte) color_type;
+   info_ptr->compression_type = (png_byte)compression_type;
+   info_ptr->filter_type = (png_byte)filter_type;
+   info_ptr->interlace_type = (png_byte)interlace_type;
+   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      info_ptr->channels = 1;
+   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      info_ptr->channels = 3;
+   else
+      info_ptr->channels = 1;
+   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
+      info_ptr->channels++;
+   info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
+
+   /* check for potential overflow */
+   if (width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      info_ptr->rowbytes = (png_size_t)0;
+   else
+      info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth,width);
+}
+
+#if defined(PNG_oFFs_SUPPORTED)
+void PNGAPI
+png_set_oFFs(png_structp png_ptr, png_infop info_ptr,
+   png_int_32 offset_x, png_int_32 offset_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function\n", "oFFs");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_offset = offset_x;
+   info_ptr->y_offset = offset_y;
+   info_ptr->offset_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_oFFs;
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+void PNGAPI
+png_set_pCAL(png_structp png_ptr, png_infop info_ptr,
+   png_charp purpose, png_int_32 X0, png_int_32 X1, int type, int nparams,
+   png_charp units, png_charpp params)
+{
+   png_uint_32 length;
+   int i;
+
+   png_debug1(1, "in %s storage function\n", "pCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   length = png_strlen(purpose) + 1;
+   png_debug1(3, "allocating purpose for info (%lu bytes)\n", length);
+   info_ptr->pcal_purpose = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->pcal_purpose == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL purpose.");
+       return;
+     }
+   png_memcpy(info_ptr->pcal_purpose, purpose, (png_size_t)length);
+
+   png_debug(3, "storing X0, X1, type, and nparams in info\n");
+   info_ptr->pcal_X0 = X0;
+   info_ptr->pcal_X1 = X1;
+   info_ptr->pcal_type = (png_byte)type;
+   info_ptr->pcal_nparams = (png_byte)nparams;
+
+   length = png_strlen(units) + 1;
+   png_debug1(3, "allocating units for info (%lu bytes)\n", length);
+   info_ptr->pcal_units = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->pcal_units == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL units.");
+       return;
+     }
+   png_memcpy(info_ptr->pcal_units, units, (png_size_t)length);
+
+   info_ptr->pcal_params = (png_charpp)png_malloc_warn(png_ptr,
+      (png_uint_32)((nparams + 1) * png_sizeof(png_charp)));
+   if (info_ptr->pcal_params == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL params.");
+       return;
+     }
+
+   info_ptr->pcal_params[nparams] = NULL;
+
+   for (i = 0; i < nparams; i++)
+   {
+      length = png_strlen(params[i]) + 1;
+      png_debug2(3, "allocating parameter %d for info (%lu bytes)\n", i, length);
+      info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length);
+      if (info_ptr->pcal_params[i] == NULL)
+        {
+          png_warning(png_ptr, "Insufficient memory for pCAL parameter.");
+          return;
+        }
+      png_memcpy(info_ptr->pcal_params[i], params[i], (png_size_t)length);
+   }
+
+   info_ptr->valid |= PNG_INFO_pCAL;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_PCAL;
+#endif
+}
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL(png_structp png_ptr, png_infop info_ptr,
+             int unit, double width, double height)
+{
+   png_debug1(1, "in %s storage function\n", "sCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->scal_unit = (png_byte)unit;
+   info_ptr->scal_pixel_width = width;
+   info_ptr->scal_pixel_height = height;
+
+   info_ptr->valid |= PNG_INFO_sCAL;
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int unit, png_charp swidth, png_charp sheight)
+{
+   png_uint_32 length;
+
+   png_debug1(1, "in %s storage function\n", "sCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->scal_unit = (png_byte)unit;
+
+   length = png_strlen(swidth) + 1;
+   png_debug1(3, "allocating unit for info (%d bytes)\n", length);
+   info_ptr->scal_s_width = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->scal_s_width == NULL)
+   {
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
+      return;
+   }
+   png_memcpy(info_ptr->scal_s_width, swidth, (png_size_t)length);
+
+   length = png_strlen(sheight) + 1;
+   png_debug1(3, "allocating unit for info (%d bytes)\n", length);
+   info_ptr->scal_s_height = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->scal_s_height == NULL)
+   {
+      png_free (png_ptr, info_ptr->scal_s_width);
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
+      return;
+   }
+   png_memcpy(info_ptr->scal_s_height, sheight, (png_size_t)length);
+   info_ptr->valid |= PNG_INFO_sCAL;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_SCAL;
+#endif
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+void PNGAPI
+png_set_pHYs(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 res_x, png_uint_32 res_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function\n", "pHYs");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_pixels_per_unit = res_x;
+   info_ptr->y_pixels_per_unit = res_y;
+   info_ptr->phys_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_pHYs;
+}
+#endif
+
+void PNGAPI
+png_set_PLTE(png_structp png_ptr, png_infop info_ptr,
+   png_colorp palette, int num_palette)
+{
+
+   png_debug1(1, "in %s storage function\n", "PLTE");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (num_palette < 0 || num_palette > PNG_MAX_PALETTE_LENGTH)
+     {
+       if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         png_error(png_ptr, "Invalid palette length");
+       else
+       {
+         png_warning(png_ptr, "Invalid palette length");
+         return;
+       }
+     }
+
+   /*
+    * It may not actually be necessary to set png_ptr->palette here;
+    * we do it for backward compatibility with the way the png_handle_tRNS
+    * function used to do the allocation.
+    */
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0);
+#endif
+
+   /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead
+      of num_palette entries,
+      in case of an invalid PNG file that has too-large sample values. */
+   png_ptr->palette = (png_colorp)png_malloc(png_ptr,
+      PNG_MAX_PALETTE_LENGTH * png_sizeof(png_color));
+   png_memset(png_ptr->palette, 0, PNG_MAX_PALETTE_LENGTH *
+      png_sizeof(png_color));
+   png_memcpy(png_ptr->palette, palette, num_palette * png_sizeof (png_color));
+   info_ptr->palette = png_ptr->palette;
+   info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette;
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_PLTE;
+#else
+   png_ptr->flags |= PNG_FLAG_FREE_PLTE;
+#endif
+
+   info_ptr->valid |= PNG_INFO_PLTE;
+}
+
+#if defined(PNG_sBIT_SUPPORTED)
+void PNGAPI
+png_set_sBIT(png_structp png_ptr, png_infop info_ptr,
+   png_color_8p sig_bit)
+{
+   png_debug1(1, "in %s storage function\n", "sBIT");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_memcpy(&(info_ptr->sig_bit), sig_bit, png_sizeof (png_color_8));
+   info_ptr->valid |= PNG_INFO_sBIT;
+}
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+void PNGAPI
+png_set_sRGB(png_structp png_ptr, png_infop info_ptr, int intent)
+{
+   png_debug1(1, "in %s storage function\n", "sRGB");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->srgb_intent = (png_byte)intent;
+   info_ptr->valid |= PNG_INFO_sRGB;
+}
+
+void PNGAPI
+png_set_sRGB_gAMA_and_cHRM(png_structp png_ptr, png_infop info_ptr,
+   int intent)
+{
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float file_gamma;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_file_gamma;
+#endif
+#endif
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_white_x, int_white_y, int_red_x, int_red_y, int_green_x,
+      int_green_y, int_blue_x, int_blue_y;
+#endif
+#endif
+   png_debug1(1, "in %s storage function\n", "sRGB_gAMA_and_cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_set_sRGB(png_ptr, info_ptr, intent);
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   file_gamma = (float).45455;
+   png_set_gAMA(png_ptr, info_ptr, file_gamma);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_file_gamma = 45455L;
+   png_set_gAMA_fixed(png_ptr, info_ptr, int_file_gamma);
+#endif
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_white_x = 31270L;
+   int_white_y = 32900L;
+   int_red_x   = 64000L;
+   int_red_y   = 33000L;
+   int_green_x = 30000L;
+   int_green_y = 60000L;
+   int_blue_x  = 15000L;
+   int_blue_y  =  6000L;
+
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_white_x, int_white_y, int_red_x, int_red_y, int_green_x, int_green_y,
+      int_blue_x, int_blue_y);
+#endif
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   white_x = (float).3127;
+   white_y = (float).3290;
+   red_x   = (float).64;
+   red_y   = (float).33;
+   green_x = (float).30;
+   green_y = (float).60;
+   blue_x  = (float).15;
+   blue_y  = (float).06;
+
+   png_set_cHRM(png_ptr, info_ptr,
+      white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
+#endif
+#endif
+}
+#endif
+
+
+#if defined(PNG_iCCP_SUPPORTED)
+void PNGAPI
+png_set_iCCP(png_structp png_ptr, png_infop info_ptr,
+             png_charp name, int compression_type,
+             png_charp profile, png_uint_32 proflen)
+{
+   png_charp new_iccp_name;
+   png_charp new_iccp_profile;
+   png_uint_32 length;
+
+   png_debug1(1, "in %s storage function\n", "iCCP");
+   if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL)
+      return;
+
+   length = png_strlen(name)+1;
+   new_iccp_name = (png_charp)png_malloc_warn(png_ptr, length);
+   if (new_iccp_name == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory to process iCCP chunk.");
+      return;
+   }
+   png_memcpy(new_iccp_name, name, length);
+   new_iccp_profile = (png_charp)png_malloc_warn(png_ptr, proflen);
+   if (new_iccp_profile == NULL)
+   {
+      png_free (png_ptr, new_iccp_name);
+      png_warning(png_ptr, "Insufficient memory to process iCCP profile.");
+      return;
+   }
+   png_memcpy(new_iccp_profile, profile, (png_size_t)proflen);
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0);
+
+   info_ptr->iccp_proflen = proflen;
+   info_ptr->iccp_name = new_iccp_name;
+   info_ptr->iccp_profile = new_iccp_profile;
+   /* Compression is always zero but is here so the API and info structure
+    * does not have to change if we introduce multiple compression types */
+   info_ptr->iccp_compression = (png_byte)compression_type;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_ICCP;
+#endif
+   info_ptr->valid |= PNG_INFO_iCCP;
+}
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+void PNGAPI
+png_set_text(png_structp png_ptr, png_infop info_ptr, png_textp text_ptr,
+   int num_text)
+{
+   int ret;
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, num_text);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store text");
+}
+
+int /* PRIVATE */
+png_set_text_2(png_structp png_ptr, png_infop info_ptr, png_textp text_ptr,
+   int num_text)
+{
+   int i;
+
+   png_debug1(1, "in %s storage function\n", (png_ptr->chunk_name[0] == '\0' ?
+      "text" : (png_const_charp)png_ptr->chunk_name));
+
+   if (png_ptr == NULL || info_ptr == NULL || num_text == 0)
+      return(0);
+
+   /* Make sure we have enough space in the "text" array in info_struct
+    * to hold all of the incoming text_ptr objects.
+    */
+   if (info_ptr->num_text + num_text > info_ptr->max_text)
+   {
+      if (info_ptr->text != NULL)
+      {
+         png_textp old_text;
+         int old_max;
+
+         old_max = info_ptr->max_text;
+         info_ptr->max_text = info_ptr->num_text + num_text + 8;
+         old_text = info_ptr->text;
+         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
+            (png_uint_32)(info_ptr->max_text * png_sizeof (png_text)));
+         if (info_ptr->text == NULL)
+           {
+             png_free(png_ptr, old_text);
+             return(1);
+           }
+         png_memcpy(info_ptr->text, old_text, (png_size_t)(old_max *
+            png_sizeof(png_text)));
+         png_free(png_ptr, old_text);
+      }
+      else
+      {
+         info_ptr->max_text = num_text + 8;
+         info_ptr->num_text = 0;
+         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
+            (png_uint_32)(info_ptr->max_text * png_sizeof (png_text)));
+         if (info_ptr->text == NULL)
+           return(1);
+#ifdef PNG_FREE_ME_SUPPORTED
+         info_ptr->free_me |= PNG_FREE_TEXT;
+#endif
+      }
+      png_debug1(3, "allocated %d entries for info_ptr->text\n",
+         info_ptr->max_text);
+   }
+   for (i = 0; i < num_text; i++)
+   {
+      png_size_t text_length,key_len;
+      png_size_t lang_len,lang_key_len;
+      png_textp textp = &(info_ptr->text[info_ptr->num_text]);
+
+      if (text_ptr[i].key == NULL)
+          continue;
+
+      key_len = png_strlen(text_ptr[i].key);
+
+      if(text_ptr[i].compression <= 0)
+      {
+        lang_len = 0;
+        lang_key_len = 0;
+      }
+      else
+#ifdef PNG_iTXt_SUPPORTED
+      {
+        /* set iTXt data */
+        if (text_ptr[i].lang != NULL)
+          lang_len = png_strlen(text_ptr[i].lang);
+        else
+          lang_len = 0;
+        if (text_ptr[i].lang_key != NULL)
+          lang_key_len = png_strlen(text_ptr[i].lang_key);
+        else
+          lang_key_len = 0;
+      }
+#else
+      {
+        png_warning(png_ptr, "iTXt chunk not supported.");
+        continue;
+      }
+#endif
+
+      if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0')
+      {
+         text_length = 0;
+#ifdef PNG_iTXt_SUPPORTED
+         if(text_ptr[i].compression > 0)
+            textp->compression = PNG_ITXT_COMPRESSION_NONE;
+         else
+#endif
+            textp->compression = PNG_TEXT_COMPRESSION_NONE;
+      }
+      else
+      {
+         text_length = png_strlen(text_ptr[i].text);
+         textp->compression = text_ptr[i].compression;
+      }
+
+      textp->key = (png_charp)png_malloc_warn(png_ptr,
+         (png_uint_32)(key_len + text_length + lang_len + lang_key_len + 4));
+      if (textp->key == NULL)
+        return(1);
+      png_debug2(2, "Allocated %lu bytes at %x in png_set_text\n",
+         (png_uint_32)(key_len + lang_len + lang_key_len + text_length + 4),
+         (int)textp->key);
+
+      png_memcpy(textp->key, text_ptr[i].key,
+         (png_size_t)(key_len));
+      *(textp->key+key_len) = '\0';
+#ifdef PNG_iTXt_SUPPORTED
+      if (text_ptr[i].compression > 0)
+      {
+         textp->lang=textp->key + key_len + 1;
+         png_memcpy(textp->lang, text_ptr[i].lang, lang_len);
+         *(textp->lang+lang_len) = '\0';
+         textp->lang_key=textp->lang + lang_len + 1;
+         png_memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
+         *(textp->lang_key+lang_key_len) = '\0';
+         textp->text=textp->lang_key + lang_key_len + 1;
+      }
+      else
+#endif
+      {
+#ifdef PNG_iTXt_SUPPORTED
+         textp->lang=NULL;
+         textp->lang_key=NULL;
+#endif
+         textp->text=textp->key + key_len + 1;
+      }
+      if(text_length)
+         png_memcpy(textp->text, text_ptr[i].text,
+            (png_size_t)(text_length));
+      *(textp->text+text_length) = '\0';
+
+#ifdef PNG_iTXt_SUPPORTED
+      if(textp->compression > 0)
+      {
+         textp->text_length = 0;
+         textp->itxt_length = text_length;
+      }
+      else
+#endif
+      {
+         textp->text_length = text_length;
+#ifdef PNG_iTXt_SUPPORTED
+         textp->itxt_length = 0;
+#endif
+      }
+      info_ptr->num_text++;
+      png_debug1(3, "transferred text chunk %d\n", info_ptr->num_text);
+   }
+   return(0);
+}
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+void PNGAPI
+png_set_tIME(png_structp png_ptr, png_infop info_ptr, png_timep mod_time)
+{
+   png_debug1(1, "in %s storage function\n", "tIME");
+   if (png_ptr == NULL || info_ptr == NULL ||
+       (png_ptr->mode & PNG_WROTE_tIME))
+      return;
+
+   png_memcpy(&(info_ptr->mod_time), mod_time, png_sizeof (png_time));
+   info_ptr->valid |= PNG_INFO_tIME;
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+void PNGAPI
+png_set_tRNS(png_structp png_ptr, png_infop info_ptr,
+   png_bytep trans, int num_trans, png_color_16p trans_values)
+{
+   png_debug1(1, "in %s storage function\n", "tRNS");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0);
+
+   if (trans != NULL)
+   {
+       /*
+        * It may not actually be necessary to set png_ptr->trans here;
+        * we do it for backward compatibility with the way the png_handle_tRNS
+        * function used to do the allocation.
+        */
+
+       /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */
+       png_ptr->trans = info_ptr->trans = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)PNG_MAX_PALETTE_LENGTH);
+       if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH)
+         png_memcpy(info_ptr->trans, trans, (png_size_t)num_trans);
+   }
+
+   if (trans_values != NULL)
+   {
+      int sample_max = (1 << info_ptr->bit_depth);
+      if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY &&
+          (int)trans_values->gray > sample_max) ||
+          (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
+          ((int)trans_values->red > sample_max ||
+          (int)trans_values->green > sample_max ||
+          (int)trans_values->blue > sample_max)))
+        png_warning(png_ptr,
+           "tRNS chunk has out-of-range samples for bit_depth");
+      png_memcpy(&(info_ptr->trans_values), trans_values,
+         png_sizeof(png_color_16));
+      if (num_trans == 0)
+        num_trans = 1;
+   }
+
+   info_ptr->num_trans = (png_uint_16)num_trans;
+   if (num_trans != 0)
+   {
+      info_ptr->valid |= PNG_INFO_tRNS;
+#ifdef PNG_FREE_ME_SUPPORTED
+      info_ptr->free_me |= PNG_FREE_TRNS;
+#else
+      png_ptr->flags |= PNG_FLAG_FREE_TRNS;
+#endif
+   }
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+void PNGAPI
+png_set_sPLT(png_structp png_ptr,
+             png_infop info_ptr, png_sPLT_tp entries, int nentries)
+{
+    png_sPLT_tp np;
+    int i;
+
+    if (png_ptr == NULL || info_ptr == NULL)
+       return;
+
+    np = (png_sPLT_tp)png_malloc_warn(png_ptr,
+        (info_ptr->splt_palettes_num + nentries) * png_sizeof(png_sPLT_t));
+    if (np == NULL)
+    {
+      png_warning(png_ptr, "No memory for sPLT palettes.");
+      return;
+    }
+
+    png_memcpy(np, info_ptr->splt_palettes,
+           info_ptr->splt_palettes_num * png_sizeof(png_sPLT_t));
+    png_free(png_ptr, info_ptr->splt_palettes);
+    info_ptr->splt_palettes=NULL;
+
+    for (i = 0; i < nentries; i++)
+    {
+        png_sPLT_tp to = np + info_ptr->splt_palettes_num + i;
+        png_sPLT_tp from = entries + i;
+        png_uint_32 length;
+
+        length = png_strlen(from->name) + 1;
+        to->name = (png_charp)png_malloc_warn(png_ptr, length);
+        if (to->name == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+           continue;
+        }
+        png_memcpy(to->name, from->name, length);
+        to->entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
+            from->nentries * png_sizeof(png_sPLT_entry));
+        if (to->entries == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+           png_free(png_ptr,to->name);
+           to->name = NULL;
+           continue;
+        }
+        png_memcpy(to->entries, from->entries,
+            from->nentries * png_sizeof(png_sPLT_entry));
+        to->nentries = from->nentries;
+        to->depth = from->depth;
+    }
+
+    info_ptr->splt_palettes = np;
+    info_ptr->splt_palettes_num += nentries;
+    info_ptr->valid |= PNG_INFO_sPLT;
+#ifdef PNG_FREE_ME_SUPPORTED
+    info_ptr->free_me |= PNG_FREE_SPLT;
+#endif
+}
+#endif /* PNG_sPLT_SUPPORTED */
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_unknown_chunks(png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns)
+{
+    png_unknown_chunkp np;
+    int i;
+
+    if (png_ptr == NULL || info_ptr == NULL || num_unknowns == 0)
+        return;
+
+    np = (png_unknown_chunkp)png_malloc_warn(png_ptr,
+        (info_ptr->unknown_chunks_num + num_unknowns) *
+        png_sizeof(png_unknown_chunk));
+    if (np == NULL)
+    {
+       png_warning(png_ptr,
+          "Out of memory while processing unknown chunk.");
+       return;
+    }
+
+    png_memcpy(np, info_ptr->unknown_chunks,
+           info_ptr->unknown_chunks_num * png_sizeof(png_unknown_chunk));
+    png_free(png_ptr, info_ptr->unknown_chunks);
+    info_ptr->unknown_chunks=NULL;
+
+    for (i = 0; i < num_unknowns; i++)
+    {
+       png_unknown_chunkp to = np + info_ptr->unknown_chunks_num + i;
+       png_unknown_chunkp from = unknowns + i;
+
+       png_memcpy((png_charp)to->name, 
+                  (png_charp)from->name, 
+                  png_sizeof(from->name));
+       to->name[png_sizeof(to->name)-1] = '\0';
+       to->size = from->size;
+       /* note our location in the read or write sequence */
+       to->location = (png_byte)(png_ptr->mode & 0xff);
+
+       if (from->size == 0)
+          to->data=NULL;
+       else
+       {
+          to->data = (png_bytep)png_malloc_warn(png_ptr, from->size);
+          if (to->data == NULL)
+          {
+             png_warning(png_ptr,
+              "Out of memory while processing unknown chunk.");
+             to->size=0;
+          }
+          else
+             png_memcpy(to->data, from->data, from->size);
+       }
+    }
+
+    info_ptr->unknown_chunks = np;
+    info_ptr->unknown_chunks_num += num_unknowns;
+#ifdef PNG_FREE_ME_SUPPORTED
+    info_ptr->free_me |= PNG_FREE_UNKN;
+#endif
+}
+void PNGAPI
+png_set_unknown_chunk_location(png_structp png_ptr, png_infop info_ptr,
+   int chunk, int location)
+{
+   if(png_ptr != NULL && info_ptr != NULL && chunk >= 0 && chunk <
+         (int)info_ptr->unknown_chunks_num)
+      info_ptr->unknown_chunks[chunk].location = (png_byte)location;
+}
+#endif
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+#if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+void PNGAPI
+png_permit_empty_plte (png_structp png_ptr, int empty_plte_permitted)
+{
+   /* This function is deprecated in favor of png_permit_mng_features()
+      and will be removed from libpng-1.3.0 */
+   png_debug(1, "in png_permit_empty_plte, DEPRECATED.\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->mng_features_permitted = (png_byte)
+     ((png_ptr->mng_features_permitted & (~PNG_FLAG_MNG_EMPTY_PLTE)) |
+     ((empty_plte_permitted & PNG_FLAG_MNG_EMPTY_PLTE)));
+}
+#endif
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+png_uint_32 PNGAPI
+png_permit_mng_features (png_structp png_ptr, png_uint_32 mng_features)
+{
+   png_debug(1, "in png_permit_mng_features\n");
+   if (png_ptr == NULL)
+      return (png_uint_32)0;
+   png_ptr->mng_features_permitted =
+     (png_byte)(mng_features & PNG_ALL_MNG_FEATURES);
+   return (png_uint_32)png_ptr->mng_features_permitted;
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_keep_unknown_chunks(png_structp png_ptr, int keep, png_bytep
+   chunk_list, int num_chunks)
+{
+    png_bytep new_list, p;
+    int i, old_num_chunks;
+    if (png_ptr == NULL)
+       return;
+    if (num_chunks == 0)
+    {
+      if(keep == PNG_HANDLE_CHUNK_ALWAYS || keep == PNG_HANDLE_CHUNK_IF_SAFE)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
+
+      if(keep == PNG_HANDLE_CHUNK_ALWAYS)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+      return;
+    }
+    if (chunk_list == NULL)
+      return;
+    old_num_chunks=png_ptr->num_chunk_list;
+    new_list=(png_bytep)png_malloc(png_ptr,
+       (png_uint_32)(5*(num_chunks+old_num_chunks)));
+    if(png_ptr->chunk_list != NULL)
+    {
+       png_memcpy(new_list, png_ptr->chunk_list,
+          (png_size_t)(5*old_num_chunks));
+       png_free(png_ptr, png_ptr->chunk_list);
+       png_ptr->chunk_list=NULL;
+    }
+    png_memcpy(new_list+5*old_num_chunks, chunk_list,
+       (png_size_t)(5*num_chunks));
+    for (p=new_list+5*old_num_chunks+4, i=0; i<num_chunks; i++, p+=5)
+       *p=(png_byte)keep;
+    png_ptr->num_chunk_list=old_num_chunks+num_chunks;
+    png_ptr->chunk_list=new_list;
+#ifdef PNG_FREE_ME_SUPPORTED
+    png_ptr->free_me |= PNG_FREE_LIST;
+#endif
+}
+#endif
+
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_read_user_chunk_fn(png_structp png_ptr, png_voidp user_chunk_ptr,
+   png_user_chunk_ptr read_user_chunk_fn)
+{
+   png_debug(1, "in png_set_read_user_chunk_fn\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->read_user_chunk_fn = read_user_chunk_fn;
+   png_ptr->user_chunk_ptr = user_chunk_ptr;
+}
+#endif
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_set_rows(png_structp png_ptr, png_infop info_ptr, png_bytepp row_pointers)
+{
+   png_debug1(1, "in %s storage function\n", "rows");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if(info_ptr->row_pointers && (info_ptr->row_pointers != row_pointers))
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+   info_ptr->row_pointers = row_pointers;
+   if(row_pointers)
+      info_ptr->valid |= PNG_INFO_IDAT;
+}
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+void PNGAPI
+png_set_compression_buffer_size(png_structp png_ptr, png_uint_32 size)
+{
+    if (png_ptr == NULL)
+       return;
+    png_free(png_ptr, png_ptr->zbuf);
+    png_ptr->zbuf_size = (png_size_t)size;
+    png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size);
+    png_ptr->zstream.next_out = png_ptr->zbuf;
+    png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+}
+#endif
+
+void PNGAPI
+png_set_invalid(png_structp png_ptr, png_infop info_ptr, int mask)
+{
+   if (png_ptr && info_ptr)
+      info_ptr->valid &= ~mask;
+}
+
+
+#ifndef PNG_1_0_X
+#ifdef PNG_ASSEMBLER_CODE_SUPPORTED
+/* function was added to libpng 1.2.0 and should always exist by default */
+void PNGAPI
+png_set_asm_flags (png_structp png_ptr, png_uint_32 asm_flags)
+{
+/* Obsolete as of libpng-1.2.20 and will be removed from libpng-1.4.0 */
+    if (png_ptr != NULL)
+    png_ptr->asm_flags = 0;
+}
+
+/* this function was added to libpng 1.2.0 */
+void PNGAPI
+png_set_mmx_thresholds (png_structp png_ptr,
+                        png_byte mmx_bitdepth_threshold,
+                        png_uint_32 mmx_rowbytes_threshold)
+{
+/* Obsolete as of libpng-1.2.20 and will be removed from libpng-1.4.0 */
+    if (png_ptr == NULL)
+       return;
+}
+#endif /* ?PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* this function was added to libpng 1.2.6 */
+void PNGAPI
+png_set_user_limits (png_structp png_ptr, png_uint_32 user_width_max,
+    png_uint_32 user_height_max)
+{
+    /* Images with dimensions larger than these limits will be
+     * rejected by png_set_IHDR().  To accept any PNG datastream
+     * regardless of dimensions, set both limits to 0x7ffffffL.
+     */
+    if(png_ptr == NULL) return;
+    png_ptr->user_width_max = user_width_max;
+    png_ptr->user_height_max = user_height_max;
+}
+#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
+
+#endif /* ?PNG_1_0_X */
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngtest.c b/PNG/pngtest.c
new file mode 100644
index 0000000..60980c2
--- /dev/null
+++ b/PNG/pngtest.c
@@ -0,0 +1,1563 @@
+
+/* pngtest.c - a simple test program to test libpng
+ *
+ * Last changed in libpng 1.2.27 - [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This program reads in a PNG image, writes it out again, and then
+ * compares the two files.  If the files are identical, this shows that
+ * the basic chunk handling, filtering, and (de)compression code is working
+ * properly.  It does not currently test all of the transforms, although
+ * it probably should.
+ *
+ * The program will report "FAIL" in certain legitimate cases:
+ * 1) when the compression level or filter selection method is changed.
+ * 2) when the maximum IDAT size (PNG_ZBUF_SIZE in pngconf.h) is not 8192.
+ * 3) unknown unsafe-to-copy ancillary chunks or unknown critical chunks
+ *    exist in the input file.
+ * 4) others not listed here...
+ * In these cases, it is best to check with another tool such as "pngcheck"
+ * to see what the differences between the two files are.
+ *
+ * If a filename is given on the command-line, then this file is used
+ * for the input, rather than the default "pngtest.png".  This allows
+ * testing a wide variety of files easily.  You can also test a number
+ * of files at once by typing "pngtest -m file1.png file2.png ..."
+ */
+
+#include "png.h"
+
+#if defined(_WIN32_WCE)
+#  if _WIN32_WCE < 211
+     __error__ (f|w)printf functions are not supported on old WindowsCE.;
+#  endif
+#  include <windows.h>
+#  include <stdlib.h>
+#  define READFILE(file, data, length, check) \
+     if (ReadFile(file, data, length, &check,NULL)) check = 0
+#  define WRITEFILE(file, data, length, check)) \
+     if (WriteFile(file, data, length, &check, NULL)) check = 0
+#  define FCLOSE(file) CloseHandle(file)
+#else
+#  include <stdio.h>
+#  include <stdlib.h>
+#  define READFILE(file, data, length, check) \
+     check=(png_size_t)fread(data,(png_size_t)1,length,file)
+#  define WRITEFILE(file, data, length, check) \
+     check=(png_size_t)fwrite(data,(png_size_t)1, length, file)
+#  define FCLOSE(file) fclose(file)
+#endif
+
+#if defined(PNG_NO_STDIO)
+#  if defined(_WIN32_WCE)
+     typedef HANDLE                png_FILE_p;
+#  else
+     typedef FILE                * png_FILE_p;
+#  endif
+#endif
+
+/* Makes pngtest verbose so we can find problems (needs to be before png.h) */
+#ifndef PNG_DEBUG
+#  define PNG_DEBUG 0
+#endif
+
+#if !PNG_DEBUG
+#  define SINGLE_ROWBUF_ALLOC  /* makes buffer overruns easier to nail */
+#endif
+
+/* Turn on CPU timing
+#define PNGTEST_TIMING
+*/
+
+#ifdef PNG_NO_FLOATING_POINT_SUPPORTED
+#undef PNGTEST_TIMING
+#endif
+
+#ifdef PNGTEST_TIMING
+static float t_start, t_stop, t_decode, t_encode, t_misc;
+#include <time.h>
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+#define PNG_tIME_STRING_LENGTH 30
+static int tIME_chunk_present=0;
+static char tIME_string[PNG_tIME_STRING_LENGTH] = "no tIME chunk present in file";
+#endif
+
+static int verbose = 0;
+
+int test_one_file PNGARG((PNG_CONST char *inname, PNG_CONST char *outname));
+
+#ifdef __TURBOC__
+#include <mem.h>
+#endif
+
+/* defined so I can write to a file on gui/windowing platforms */
+/*  #define STDERR stderr  */
+#define STDERR stdout   /* for DOS */
+
+/* example of using row callbacks to make a simple progress meter */
+static int status_pass=1;
+static int status_dots_requested=0;
+static int status_dots=1;
+
+/* In case a system header (e.g., on AIX) defined jmpbuf */
+#ifdef jmpbuf
+#  undef jmpbuf
+#endif
+
+/* Define png_jmpbuf() in case we are using a pre-1.0.6 version of libpng */
+#ifndef png_jmpbuf
+#  define png_jmpbuf(png_ptr) png_ptr->jmpbuf
+#endif
+
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass);
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
+{
+    if(png_ptr == NULL || row_number > PNG_UINT_31_MAX) return;
+    if(status_pass != pass)
+    {
+       fprintf(stdout,"\n Pass %d: ",pass);
+       status_pass = pass;
+       status_dots = 31;
+    }
+    status_dots--;
+    if(status_dots == 0)
+    {
+       fprintf(stdout, "\n         ");
+       status_dots=30;
+    }
+    fprintf(stdout, "r");
+}
+
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass);
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
+{
+    if(png_ptr == NULL || row_number > PNG_UINT_31_MAX || pass > 7) return;
+    fprintf(stdout, "w");
+}
+
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+/* Example of using user transform callback (we don't transform anything,
+   but merely examine the row filters.  We set this to 256 rather than
+   5 in case illegal filter values are present.) */
+static png_uint_32 filters_used[256];
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data);
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data)
+{
+    if(png_ptr != NULL && row_info != NULL)
+      ++filters_used[*(data-1)];
+}
+#endif
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+/* example of using user transform callback (we don't transform anything,
+   but merely count the zero samples) */
+
+static png_uint_32 zero_samples;
+
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data);
+void
+#ifdef PNG_1_0_X
+PNGAPI
+#endif
+count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data)
+{
+   png_bytep dp = data;
+   if(png_ptr == NULL)return;
+
+   /* contents of row_info:
+    *  png_uint_32 width      width of row
+    *  png_uint_32 rowbytes   number of bytes in row
+    *  png_byte color_type    color type of pixels
+    *  png_byte bit_depth     bit depth of samples
+    *  png_byte channels      number of channels (1-4)
+    *  png_byte pixel_depth   bits per pixel (depth*channels)
+    */
+
+
+    /* counts the number of zero samples (or zero pixels if color_type is 3 */
+
+    if(row_info->color_type == 0 || row_info->color_type == 3)
+    {
+       int pos=0;
+       png_uint_32 n, nstop;
+       for (n=0, nstop=row_info->width; n<nstop; n++)
+       {
+          if(row_info->bit_depth == 1)
+          {
+             if(((*dp << pos++ ) & 0x80) == 0) zero_samples++;
+             if(pos == 8)
+             {
+                pos = 0;
+                dp++;
+             }
+          }
+          if(row_info->bit_depth == 2)
+          {
+             if(((*dp << (pos+=2)) & 0xc0) == 0) zero_samples++;
+             if(pos == 8)
+             {
+                pos = 0;
+                dp++;
+             }
+          }
+          if(row_info->bit_depth == 4)
+          {
+             if(((*dp << (pos+=4)) & 0xf0) == 0) zero_samples++;
+             if(pos == 8)
+             {
+                pos = 0;
+                dp++;
+             }
+          }
+          if(row_info->bit_depth == 8)
+             if(*dp++ == 0) zero_samples++;
+          if(row_info->bit_depth == 16)
+          {
+             if((*dp | *(dp+1)) == 0) zero_samples++;
+             dp+=2;
+          }
+       }
+    }
+    else /* other color types */
+    {
+       png_uint_32 n, nstop;
+       int channel;
+       int color_channels = row_info->channels;
+       if(row_info->color_type > 3)color_channels--;
+
+       for (n=0, nstop=row_info->width; n<nstop; n++)
+       {
+          for (channel = 0; channel < color_channels; channel++)
+          {
+             if(row_info->bit_depth == 8)
+                if(*dp++ == 0) zero_samples++;
+             if(row_info->bit_depth == 16)
+             {
+                if((*dp | *(dp+1)) == 0) zero_samples++;
+                dp+=2;
+             }
+          }
+          if(row_info->color_type > 3)
+          {
+             dp++;
+             if(row_info->bit_depth == 16)dp++;
+          }
+       }
+    }
+}
+#endif /* PNG_WRITE_USER_TRANSFORM_SUPPORTED */
+
+static int wrote_question = 0;
+
+#if defined(PNG_NO_STDIO)
+/* START of code to validate stdio-free compilation */
+/* These copies of the default read/write functions come from pngrio.c and */
+/* pngwio.c.  They allow "don't include stdio" testing of the library. */
+/* This is the function that does the actual reading of data.  If you are
+   not reading from a standard C stream, you should create a replacement
+   read_data function and use it at run time with png_set_read_fn(), rather
+   than changing the library. */
+
+#ifndef USE_FAR_KEYWORD
+static void
+pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_size_t check;
+
+   /* fread() returns 0 on error, so it is OK to store this in a png_size_t
+    * instead of an int, which is what fread() actually returns.
+    */
+   READFILE((png_FILE_p)png_ptr->io_ptr, data, length, check);
+
+   if (check != length)
+   {
+      png_error(png_ptr, "Read Error!");
+   }
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+static void
+pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   int check;
+   png_byte *n_data;
+   png_FILE_p io_ptr;
+
+   /* Check if data really is near. If so, use usual code. */
+   n_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)n_data == data)
+   {
+      READFILE(io_ptr, n_data, length, check);
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t read, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         read = MIN(NEAR_BUF_SIZE, remaining);
+         READFILE(io_ptr, buf, 1, err);
+         png_memcpy(data, buf, read); /* copy far buffer to near buffer */
+         if(err != read)
+            break;
+         else
+            check += err;
+         data += read;
+         remaining -= read;
+      }
+      while (remaining != 0);
+   }
+   if (check != length)
+   {
+      png_error(png_ptr, "read Error");
+   }
+}
+#endif /* USE_FAR_KEYWORD */
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+static void
+pngtest_flush(png_structp png_ptr)
+{
+#if !defined(_WIN32_WCE)
+   png_FILE_p io_ptr;
+   io_ptr = (png_FILE_p)CVT_PTR((png_ptr->io_ptr));
+   if (io_ptr != NULL)
+      fflush(io_ptr);
+#endif
+}
+#endif
+
+/* This is the function that does the actual writing of data.  If you are
+   not writing to a standard C stream, you should create a replacement
+   write_data function and use it at run time with png_set_write_fn(), rather
+   than changing the library. */
+#ifndef USE_FAR_KEYWORD
+static void
+pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+
+   WRITEFILE((png_FILE_p)png_ptr->io_ptr,  data, length, check);
+   if (check != length)
+   {
+      png_error(png_ptr, "Write Error");
+   }
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+static void
+pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+   png_byte *near_data;  /* Needs to be "png_byte *" instead of "png_bytep" */
+   png_FILE_p io_ptr;
+
+   /* Check if data really is near. If so, use usual code. */
+   near_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)near_data == data)
+   {
+      WRITEFILE(io_ptr, near_data, length, check);
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t written, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         written = MIN(NEAR_BUF_SIZE, remaining);
+         png_memcpy(buf, data, written); /* copy far buffer to near buffer */
+         WRITEFILE(io_ptr, buf, written, err);
+         if (err != written)
+            break;
+         else
+            check += err;
+         data += written;
+         remaining -= written;
+      }
+      while (remaining != 0);
+   }
+   if (check != length)
+   {
+      png_error(png_ptr, "Write Error");
+   }
+}
+#endif /* USE_FAR_KEYWORD */
+#endif /* PNG_NO_STDIO */
+/* END of code to validate stdio-free compilation */
+
+/* This function is called when there is a warning, but the library thinks
+ * it can continue anyway.  Replacement functions don't have to do anything
+ * here if you don't want to.  In the default configuration, png_ptr is
+ * not used, but it is passed in case it may be useful.
+ */
+static void
+pngtest_warning(png_structp png_ptr, png_const_charp message)
+{
+   PNG_CONST char *name = "UNKNOWN (ERROR!)";
+   if (png_ptr != NULL && png_ptr->error_ptr != NULL)
+      name = png_ptr->error_ptr;
+   fprintf(STDERR, "%s: libpng warning: %s\n", name, message);
+}
+
+/* This is the default error handling function.  Note that replacements for
+ * this function MUST NOT RETURN, or the program will likely crash.  This
+ * function is used by default, or if the program supplies NULL for the
+ * error function pointer in png_set_error_fn().
+ */
+static void
+pngtest_error(png_structp png_ptr, png_const_charp message)
+{
+   pngtest_warning(png_ptr, message);
+   /* We can return because png_error calls the default handler, which is
+    * actually OK in this case. */
+}
+
+/* START of code to validate memory allocation and deallocation */
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+
+/* Allocate memory.  For reasonable files, size should never exceed
+   64K.  However, zlib may allocate more then 64K if you don't tell
+   it not to.  See zconf.h and png.h for more information.  zlib does
+   need to allocate exactly 64K, so whatever you call here must
+   have the ability to do that.
+
+   This piece of code can be compiled to validate max 64K allocations
+   by setting MAXSEG_64K in zlib zconf.h *or* PNG_MAX_MALLOC_64K. */
+typedef struct memory_information
+{
+   png_uint_32               size;
+   png_voidp                 pointer;
+   struct memory_information FAR *next;
+} memory_information;
+typedef memory_information FAR *memory_infop;
+
+static memory_infop pinformation = NULL;
+static int current_allocation = 0;
+static int maximum_allocation = 0;
+static int total_allocation = 0;
+static int num_allocations = 0;
+
+png_voidp png_debug_malloc PNGARG((png_structp png_ptr, png_uint_32 size));
+void png_debug_free PNGARG((png_structp png_ptr, png_voidp ptr));
+
+png_voidp
+png_debug_malloc(png_structp png_ptr, png_uint_32 size)
+{
+
+   /* png_malloc has already tested for NULL; png_create_struct calls
+      png_debug_malloc directly, with png_ptr == NULL which is OK */
+
+   if (size == 0)
+      return (NULL);
+
+   /* This calls the library allocator twice, once to get the requested
+      buffer and once to get a new free list entry. */
+   {
+      /* Disable malloc_fn and free_fn */
+      memory_infop pinfo;
+      png_set_mem_fn(png_ptr, NULL, NULL, NULL);
+      pinfo = (memory_infop)png_malloc(png_ptr,
+         (png_uint_32)png_sizeof (*pinfo));
+      pinfo->size = size;
+      current_allocation += size;
+      total_allocation += size;
+      num_allocations ++;
+      if (current_allocation > maximum_allocation)
+         maximum_allocation = current_allocation;
+      pinfo->pointer = (png_voidp)png_malloc(png_ptr, size);
+      /* Restore malloc_fn and free_fn */
+      png_set_mem_fn(png_ptr, png_voidp_NULL, (png_malloc_ptr)png_debug_malloc,
+         (png_free_ptr)png_debug_free);
+      if (size != 0 && pinfo->pointer == NULL)
+      {
+         current_allocation -= size;
+         total_allocation -= size;
+         png_error(png_ptr,
+           "out of memory in pngtest->png_debug_malloc.");
+      }
+      pinfo->next = pinformation;
+      pinformation = pinfo;
+      /* Make sure the caller isn't assuming zeroed memory. */
+      png_memset(pinfo->pointer, 0xdd, pinfo->size);
+      if(verbose)
+         printf("png_malloc %lu bytes at %x\n",(unsigned long)size,
+          pinfo->pointer);
+      return (png_voidp)(pinfo->pointer);
+   }
+}
+
+/* Free a pointer.  It is removed from the list at the same time. */
+void
+png_debug_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL)
+      fprintf(STDERR, "NULL pointer to png_debug_free.\n");
+   if (ptr == 0)
+   {
+#if 0 /* This happens all the time. */
+      fprintf(STDERR, "WARNING: freeing NULL pointer\n");
+#endif
+      return;
+   }
+
+   /* Unlink the element from the list. */
+   {
+      memory_infop FAR *ppinfo = &pinformation;
+      for (;;)
+      {
+         memory_infop pinfo = *ppinfo;
+         if (pinfo->pointer == ptr)
+         {
+            *ppinfo = pinfo->next;
+            current_allocation -= pinfo->size;
+            if (current_allocation < 0)
+               fprintf(STDERR, "Duplicate free of memory\n");
+            /* We must free the list element too, but first kill
+               the memory that is to be freed. */
+            png_memset(ptr, 0x55, pinfo->size);
+            png_free_default(png_ptr, pinfo);
+            pinfo=NULL;
+            break;
+         }
+         if (pinfo->next == NULL)
+         {
+            fprintf(STDERR, "Pointer %x not found\n", (unsigned int)ptr);
+            break;
+         }
+         ppinfo = &pinfo->next;
+      }
+   }
+
+   /* Finally free the data. */
+   if(verbose)
+      printf("Freeing %x\n",ptr);
+   png_free_default(png_ptr, ptr);
+   ptr=NULL;
+}
+#endif /* PNG_USER_MEM_SUPPORTED && PNG_DEBUG */
+/* END of code to test memory allocation/deallocation */
+
+/* Test one file */
+int
+test_one_file(PNG_CONST char *inname, PNG_CONST char *outname)
+{
+   static png_FILE_p fpin;
+   static png_FILE_p fpout;  /* "static" prevents setjmp corruption */
+   png_structp read_ptr;
+   png_infop read_info_ptr, end_info_ptr;
+#ifdef PNG_WRITE_SUPPORTED
+   png_structp write_ptr;
+   png_infop write_info_ptr;
+   png_infop write_end_info_ptr;
+#else
+   png_structp write_ptr = NULL;
+   png_infop write_info_ptr = NULL;
+   png_infop write_end_info_ptr = NULL;
+#endif
+   png_bytep row_buf;
+   png_uint_32 y;
+   png_uint_32 width, height;
+   int num_pass, pass;
+   int bit_depth, color_type;
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   jmp_buf jmpbuf;
+#endif
+#endif
+
+#if defined(_WIN32_WCE)
+   TCHAR path[MAX_PATH];
+#endif
+   char inbuf[256], outbuf[256];
+
+   row_buf = NULL;
+
+#if defined(_WIN32_WCE)
+   MultiByteToWideChar(CP_ACP, 0, inname, -1, path, MAX_PATH);
+   if ((fpin = CreateFile(path, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0, NULL)) == INVALID_HANDLE_VALUE)
+#else
+   if ((fpin = fopen(inname, "rb")) == NULL)
+#endif
+   {
+      fprintf(STDERR, "Could not find input file %s\n", inname);
+      return (1);
+   }
+
+#if defined(_WIN32_WCE)
+   MultiByteToWideChar(CP_ACP, 0, outname, -1, path, MAX_PATH);
+   if ((fpout = CreateFile(path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL)) == INVALID_HANDLE_VALUE)
+#else
+   if ((fpout = fopen(outname, "wb")) == NULL)
+#endif
+   {
+      fprintf(STDERR, "Could not open output file %s\n", outname);
+      FCLOSE(fpin);
+      return (1);
+   }
+
+   png_debug(0, "Allocating read and write structures\n");
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+   read_ptr = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
+      png_error_ptr_NULL, png_error_ptr_NULL, png_voidp_NULL,
+      (png_malloc_ptr)png_debug_malloc, (png_free_ptr)png_debug_free);
+#else
+   read_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
+      png_error_ptr_NULL, png_error_ptr_NULL);
+#endif
+   png_set_error_fn(read_ptr, (png_voidp)inname, pngtest_error,
+       pngtest_warning);
+#ifdef PNG_WRITE_SUPPORTED
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+   write_ptr = png_create_write_struct_2(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
+      png_error_ptr_NULL, png_error_ptr_NULL, png_voidp_NULL,
+      (png_malloc_ptr)png_debug_malloc, (png_free_ptr)png_debug_free);
+#else
+   write_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
+      png_error_ptr_NULL, png_error_ptr_NULL);
+#endif
+   png_set_error_fn(write_ptr, (png_voidp)inname, pngtest_error,
+       pngtest_warning);
+#endif
+   png_debug(0, "Allocating read_info, write_info and end_info structures\n");
+   read_info_ptr = png_create_info_struct(read_ptr);
+   end_info_ptr = png_create_info_struct(read_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+   write_info_ptr = png_create_info_struct(write_ptr);
+   write_end_info_ptr = png_create_info_struct(write_ptr);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   png_debug(0, "Setting jmpbuf for read struct\n");
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_jmpbuf(read_ptr)))
+#endif
+   {
+      fprintf(STDERR, "%s -> %s: libpng read error\n", inname, outname);
+      png_free(read_ptr, row_buf);
+      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+      png_destroy_write_struct(&write_ptr, &write_info_ptr);
+#endif
+      FCLOSE(fpin);
+      FCLOSE(fpout);
+      return (1);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_jmpbuf(read_ptr),jmpbuf,png_sizeof(jmp_buf));
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+   png_debug(0, "Setting jmpbuf for write struct\n");
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_jmpbuf(write_ptr)))
+#endif
+   {
+      fprintf(STDERR, "%s -> %s: libpng write error\n", inname, outname);
+      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+      png_destroy_write_struct(&write_ptr, &write_info_ptr);
+#endif
+      FCLOSE(fpin);
+      FCLOSE(fpout);
+      return (1);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_jmpbuf(write_ptr),jmpbuf,png_sizeof(jmp_buf));
+#endif
+#endif
+#endif
+
+   png_debug(0, "Initializing input and output streams\n");
+#if !defined(PNG_NO_STDIO)
+   png_init_io(read_ptr, fpin);
+#  ifdef PNG_WRITE_SUPPORTED
+   png_init_io(write_ptr, fpout);
+#  endif
+#else
+   png_set_read_fn(read_ptr, (png_voidp)fpin, pngtest_read_data);
+#  ifdef PNG_WRITE_SUPPORTED
+   png_set_write_fn(write_ptr, (png_voidp)fpout,  pngtest_write_data,
+#    if defined(PNG_WRITE_FLUSH_SUPPORTED)
+      pngtest_flush);
+#    else
+      NULL);
+#    endif
+#  endif
+#endif
+   if(status_dots_requested == 1)
+   {
+#ifdef PNG_WRITE_SUPPORTED
+      png_set_write_status_fn(write_ptr, write_row_callback);
+#endif
+      png_set_read_status_fn(read_ptr, read_row_callback);
+   }
+   else
+   {
+#ifdef PNG_WRITE_SUPPORTED
+      png_set_write_status_fn(write_ptr, png_write_status_ptr_NULL);
+#endif
+      png_set_read_status_fn(read_ptr, png_read_status_ptr_NULL);
+   }
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   {
+     int i;
+     for(i=0; i<256; i++)
+        filters_used[i]=0;
+     png_set_read_user_transform_fn(read_ptr, count_filters);
+   }
+#endif
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   zero_samples=0;
+   png_set_write_user_transform_fn(write_ptr, count_zero_samples);
+#endif
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+#  ifndef PNG_HANDLE_CHUNK_ALWAYS
+#    define PNG_HANDLE_CHUNK_ALWAYS       3
+#  endif
+   png_set_keep_unknown_chunks(read_ptr, PNG_HANDLE_CHUNK_ALWAYS,
+      png_bytep_NULL, 0);
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+#  ifndef PNG_HANDLE_CHUNK_IF_SAFE
+#    define PNG_HANDLE_CHUNK_IF_SAFE      2
+#  endif
+   png_set_keep_unknown_chunks(write_ptr, PNG_HANDLE_CHUNK_IF_SAFE,
+      png_bytep_NULL, 0);
+#endif
+
+   png_debug(0, "Reading info struct\n");
+   png_read_info(read_ptr, read_info_ptr);
+
+   png_debug(0, "Transferring info struct\n");
+   {
+      int interlace_type, compression_type, filter_type;
+
+      if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth,
+          &color_type, &interlace_type, &compression_type, &filter_type))
+      {
+         png_set_IHDR(write_ptr, write_info_ptr, width, height, bit_depth,
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+            color_type, interlace_type, compression_type, filter_type);
+#else
+            color_type, PNG_INTERLACE_NONE, compression_type, filter_type);
+#endif
+      }
+   }
+#if defined(PNG_FIXED_POINT_SUPPORTED)
+#if defined(PNG_cHRM_SUPPORTED)
+   {
+      png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
+         blue_y;
+      if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y, &red_x,
+         &red_y, &green_x, &green_y, &blue_x, &blue_y))
+      {
+         png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y, red_x,
+            red_y, green_x, green_y, blue_x, blue_y);
+      }
+   }
+#endif
+#if defined(PNG_gAMA_SUPPORTED)
+   {
+      png_fixed_point gamma;
+
+      if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &gamma))
+      {
+         png_set_gAMA_fixed(write_ptr, write_info_ptr, gamma);
+      }
+   }
+#endif
+#else /* Use floating point versions */
+#if defined(PNG_FLOATING_POINT_SUPPORTED)
+#if defined(PNG_cHRM_SUPPORTED)
+   {
+      double white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
+         blue_y;
+      if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x,
+         &red_y, &green_x, &green_y, &blue_x, &blue_y))
+      {
+         png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x,
+            red_y, green_x, green_y, blue_x, blue_y);
+      }
+   }
+#endif
+#if defined(PNG_gAMA_SUPPORTED)
+   {
+      double gamma;
+
+      if (png_get_gAMA(read_ptr, read_info_ptr, &gamma))
+      {
+         png_set_gAMA(write_ptr, write_info_ptr, gamma);
+      }
+   }
+#endif
+#endif /* floating point */
+#endif /* fixed point */
+#if defined(PNG_iCCP_SUPPORTED)
+   {
+      png_charp name;
+      png_charp profile;
+      png_uint_32 proflen;
+      int compression_type;
+
+      if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type,
+                      &profile, &proflen))
+      {
+         png_set_iCCP(write_ptr, write_info_ptr, name, compression_type,
+                      profile, proflen);
+      }
+   }
+#endif
+#if defined(PNG_sRGB_SUPPORTED)
+   {
+      int intent;
+
+      if (png_get_sRGB(read_ptr, read_info_ptr, &intent))
+      {
+         png_set_sRGB(write_ptr, write_info_ptr, intent);
+      }
+   }
+#endif
+   {
+      png_colorp palette;
+      int num_palette;
+
+      if (png_get_PLTE(read_ptr, read_info_ptr, &palette, &num_palette))
+      {
+         png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette);
+      }
+   }
+#if defined(PNG_bKGD_SUPPORTED)
+   {
+      png_color_16p background;
+
+      if (png_get_bKGD(read_ptr, read_info_ptr, &background))
+      {
+         png_set_bKGD(write_ptr, write_info_ptr, background);
+      }
+   }
+#endif
+#if defined(PNG_hIST_SUPPORTED)
+   {
+      png_uint_16p hist;
+
+      if (png_get_hIST(read_ptr, read_info_ptr, &hist))
+      {
+         png_set_hIST(write_ptr, write_info_ptr, hist);
+      }
+   }
+#endif
+#if defined(PNG_oFFs_SUPPORTED)
+   {
+      png_int_32 offset_x, offset_y;
+      int unit_type;
+
+      if (png_get_oFFs(read_ptr, read_info_ptr,&offset_x,&offset_y,&unit_type))
+      {
+         png_set_oFFs(write_ptr, write_info_ptr, offset_x, offset_y, unit_type);
+      }
+   }
+#endif
+#if defined(PNG_pCAL_SUPPORTED)
+   {
+      png_charp purpose, units;
+      png_charpp params;
+      png_int_32 X0, X1;
+      int type, nparams;
+
+      if (png_get_pCAL(read_ptr, read_info_ptr, &purpose, &X0, &X1, &type,
+         &nparams, &units, &params))
+      {
+         png_set_pCAL(write_ptr, write_info_ptr, purpose, X0, X1, type,
+            nparams, units, params);
+      }
+   }
+#endif
+#if defined(PNG_pHYs_SUPPORTED)
+   {
+      png_uint_32 res_x, res_y;
+      int unit_type;
+
+      if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y, &unit_type))
+      {
+         png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type);
+      }
+   }
+#endif
+#if defined(PNG_sBIT_SUPPORTED)
+   {
+      png_color_8p sig_bit;
+
+      if (png_get_sBIT(read_ptr, read_info_ptr, &sig_bit))
+      {
+         png_set_sBIT(write_ptr, write_info_ptr, sig_bit);
+      }
+   }
+#endif
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   {
+      int unit;
+      double scal_width, scal_height;
+
+      if (png_get_sCAL(read_ptr, read_info_ptr, &unit, &scal_width,
+         &scal_height))
+      {
+         png_set_sCAL(write_ptr, write_info_ptr, unit, scal_width, scal_height);
+      }
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   {
+      int unit;
+      png_charp scal_width, scal_height;
+
+      if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &scal_width,
+          &scal_height))
+      {
+         png_set_sCAL_s(write_ptr, write_info_ptr, unit, scal_width, scal_height);
+      }
+   }
+#endif
+#endif
+#endif
+#if defined(PNG_TEXT_SUPPORTED)
+   {
+      png_textp text_ptr;
+      int num_text;
+
+      if (png_get_text(read_ptr, read_info_ptr, &text_ptr, &num_text) > 0)
+      {
+         png_debug1(0, "Handling %d iTXt/tEXt/zTXt chunks\n", num_text);
+         png_set_text(write_ptr, write_info_ptr, text_ptr, num_text);
+      }
+   }
+#endif
+#if defined(PNG_tIME_SUPPORTED)
+   {
+      png_timep mod_time;
+
+      if (png_get_tIME(read_ptr, read_info_ptr, &mod_time))
+      {
+         png_set_tIME(write_ptr, write_info_ptr, mod_time);
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+         /* we have to use png_memcpy instead of "=" because the string
+            pointed to by png_convert_to_rfc1123() gets free'ed before
+            we use it */
+         png_memcpy(tIME_string,
+                    png_convert_to_rfc1123(read_ptr, mod_time), 
+                    png_sizeof(tIME_string));
+         tIME_string[png_sizeof(tIME_string)-1] = '\0';
+         tIME_chunk_present++;
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+      }
+   }
+#endif
+#if defined(PNG_tRNS_SUPPORTED)
+   {
+      png_bytep trans;
+      int num_trans;
+      png_color_16p trans_values;
+
+      if (png_get_tRNS(read_ptr, read_info_ptr, &trans, &num_trans,
+         &trans_values))
+      {
+         int sample_max = (1 << read_info_ptr->bit_depth);
+         /* libpng doesn't reject a tRNS chunk with out-of-range samples */
+         if (!((read_info_ptr->color_type == PNG_COLOR_TYPE_GRAY &&
+            (int)trans_values->gray > sample_max) ||
+            (read_info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
+            ((int)trans_values->red > sample_max ||
+            (int)trans_values->green > sample_max ||
+            (int)trans_values->blue > sample_max))))
+           png_set_tRNS(write_ptr, write_info_ptr, trans, num_trans,
+              trans_values);
+      }
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   {
+      png_unknown_chunkp unknowns;
+      int num_unknowns = (int)png_get_unknown_chunks(read_ptr, read_info_ptr,
+         &unknowns);
+      if (num_unknowns)
+      {
+         png_size_t i;
+         png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns,
+           num_unknowns);
+         /* copy the locations from the read_info_ptr.  The automatically
+            generated locations in write_info_ptr are wrong because we
+            haven't written anything yet */
+         for (i = 0; i < (png_size_t)num_unknowns; i++)
+           png_set_unknown_chunk_location(write_ptr, write_info_ptr, i,
+             unknowns[i].location);
+      }
+   }
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+   png_debug(0, "\nWriting info struct\n");
+
+/* If we wanted, we could write info in two steps:
+   png_write_info_before_PLTE(write_ptr, write_info_ptr);
+ */
+   png_write_info(write_ptr, write_info_ptr);
+#endif
+
+#ifdef SINGLE_ROWBUF_ALLOC
+   png_debug(0, "\nAllocating row buffer...");
+   row_buf = (png_bytep)png_malloc(read_ptr,
+      png_get_rowbytes(read_ptr, read_info_ptr));
+   png_debug1(0, "0x%08lx\n\n", (unsigned long)row_buf);
+#endif /* SINGLE_ROWBUF_ALLOC */
+   png_debug(0, "Writing row data\n");
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+  defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   num_pass = png_set_interlace_handling(read_ptr);
+#  ifdef PNG_WRITE_SUPPORTED
+   png_set_interlace_handling(write_ptr);
+#  endif
+#else
+   num_pass=1;
+#endif
+
+#ifdef PNGTEST_TIMING
+   t_stop = (float)clock();
+   t_misc += (t_stop - t_start);
+   t_start = t_stop;
+#endif
+   for (pass = 0; pass < num_pass; pass++)
+   {
+      png_debug1(0, "Writing row data for pass %d\n",pass);
+      for (y = 0; y < height; y++)
+      {
+#ifndef SINGLE_ROWBUF_ALLOC
+         png_debug2(0, "\nAllocating row buffer (pass %d, y = %ld)...", pass,y);
+         row_buf = (png_bytep)png_malloc(read_ptr,
+            png_get_rowbytes(read_ptr, read_info_ptr));
+         png_debug2(0, "0x%08lx (%ld bytes)\n", (unsigned long)row_buf,
+            png_get_rowbytes(read_ptr, read_info_ptr));
+#endif /* !SINGLE_ROWBUF_ALLOC */
+         png_read_rows(read_ptr, (png_bytepp)&row_buf, png_bytepp_NULL, 1);
+
+#ifdef PNG_WRITE_SUPPORTED
+#ifdef PNGTEST_TIMING
+         t_stop = (float)clock();
+         t_decode += (t_stop - t_start);
+         t_start = t_stop;
+#endif
+         png_write_rows(write_ptr, (png_bytepp)&row_buf, 1);
+#ifdef PNGTEST_TIMING
+         t_stop = (float)clock();
+         t_encode += (t_stop - t_start);
+         t_start = t_stop;
+#endif
+#endif /* PNG_WRITE_SUPPORTED */
+
+#ifndef SINGLE_ROWBUF_ALLOC
+         png_debug2(0, "Freeing row buffer (pass %d, y = %ld)\n\n", pass, y);
+         png_free(read_ptr, row_buf);
+#endif /* !SINGLE_ROWBUF_ALLOC */
+      }
+   }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free_data(read_ptr, read_info_ptr, PNG_FREE_UNKN, -1);
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free_data(write_ptr, write_info_ptr, PNG_FREE_UNKN, -1);
+#endif
+
+   png_debug(0, "Reading and writing end_info data\n");
+
+   png_read_end(read_ptr, end_info_ptr);
+#if defined(PNG_TEXT_SUPPORTED)
+   {
+      png_textp text_ptr;
+      int num_text;
+
+      if (png_get_text(read_ptr, end_info_ptr, &text_ptr, &num_text) > 0)
+      {
+         png_debug1(0, "Handling %d iTXt/tEXt/zTXt chunks\n", num_text);
+         png_set_text(write_ptr, write_end_info_ptr, text_ptr, num_text);
+      }
+   }
+#endif
+#if defined(PNG_tIME_SUPPORTED)
+   {
+      png_timep mod_time;
+
+      if (png_get_tIME(read_ptr, end_info_ptr, &mod_time))
+      {
+         png_set_tIME(write_ptr, write_end_info_ptr, mod_time);
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+         /* we have to use png_memcpy instead of "=" because the string
+            pointed to by png_convert_to_rfc1123() gets free'ed before
+            we use it */
+         png_memcpy(tIME_string,
+                    png_convert_to_rfc1123(read_ptr, mod_time),
+                    png_sizeof(tIME_string));
+         tIME_string[png_sizeof(tIME_string)-1] = '\0';
+         tIME_chunk_present++;
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+      }
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   {
+      png_unknown_chunkp unknowns;
+      int num_unknowns;
+      num_unknowns = (int)png_get_unknown_chunks(read_ptr, end_info_ptr,
+         &unknowns);
+      if (num_unknowns)
+      {
+         png_size_t i;
+         png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns,
+           num_unknowns);
+         /* copy the locations from the read_info_ptr.  The automatically
+            generated locations in write_end_info_ptr are wrong because we
+            haven't written the end_info yet */
+         for (i = 0; i < (png_size_t)num_unknowns; i++)
+           png_set_unknown_chunk_location(write_ptr, write_end_info_ptr, i,
+             unknowns[i].location);
+      }
+   }
+#endif
+#ifdef PNG_WRITE_SUPPORTED
+   png_write_end(write_ptr, write_end_info_ptr);
+#endif
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+   if(verbose)
+   {
+      png_uint_32 iwidth, iheight;
+      iwidth = png_get_image_width(write_ptr, write_info_ptr);
+      iheight = png_get_image_height(write_ptr, write_info_ptr);
+      fprintf(STDERR, "Image width = %lu, height = %lu\n",
+         (unsigned long)iwidth, (unsigned long)iheight);
+   }
+#endif
+
+   png_debug(0, "Destroying data structs\n");
+#ifdef SINGLE_ROWBUF_ALLOC
+   png_debug(1, "destroying row_buf for read_ptr\n");
+   png_free(read_ptr, row_buf);
+   row_buf=NULL;
+#endif /* SINGLE_ROWBUF_ALLOC */
+   png_debug(1, "destroying read_ptr, read_info_ptr, end_info_ptr\n");
+   png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+   png_debug(1, "destroying write_end_info_ptr\n");
+   png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+   png_debug(1, "destroying write_ptr, write_info_ptr\n");
+   png_destroy_write_struct(&write_ptr, &write_info_ptr);
+#endif
+   png_debug(0, "Destruction complete.\n");
+
+   FCLOSE(fpin);
+   FCLOSE(fpout);
+
+   png_debug(0, "Opening files for comparison\n");
+#if defined(_WIN32_WCE)
+   MultiByteToWideChar(CP_ACP, 0, inname, -1, path, MAX_PATH);
+   if ((fpin = CreateFile(path, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0, NULL)) == INVALID_HANDLE_VALUE)
+#else
+   if ((fpin = fopen(inname, "rb")) == NULL)
+#endif
+   {
+      fprintf(STDERR, "Could not find file %s\n", inname);
+      return (1);
+   }
+
+#if defined(_WIN32_WCE)
+   MultiByteToWideChar(CP_ACP, 0, outname, -1, path, MAX_PATH);
+   if ((fpout = CreateFile(path, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0, NULL)) == INVALID_HANDLE_VALUE)
+#else
+   if ((fpout = fopen(outname, "rb")) == NULL)
+#endif
+   {
+      fprintf(STDERR, "Could not find file %s\n", outname);
+      FCLOSE(fpin);
+      return (1);
+   }
+
+   for(;;)
+   {
+      png_size_t num_in, num_out;
+
+      READFILE(fpin, inbuf, 1, num_in);
+      READFILE(fpout, outbuf, 1, num_out);
+
+      if (num_in != num_out)
+      {
+         fprintf(STDERR, "\nFiles %s and %s are of a different size\n",
+                 inname, outname);
+         if(wrote_question == 0)
+         {
+            fprintf(STDERR,
+         "   Was %s written with the same maximum IDAT chunk size (%d bytes),",
+              inname,PNG_ZBUF_SIZE);
+            fprintf(STDERR,
+              "\n   filtering heuristic (libpng default), compression");
+            fprintf(STDERR,
+              " level (zlib default),\n   and zlib version (%s)?\n\n",
+              ZLIB_VERSION);
+            wrote_question=1;
+         }
+         FCLOSE(fpin);
+         FCLOSE(fpout);
+         return (0);
+      }
+
+      if (!num_in)
+         break;
+
+      if (png_memcmp(inbuf, outbuf, num_in))
+      {
+         fprintf(STDERR, "\nFiles %s and %s are different\n", inname, outname);
+         if(wrote_question == 0)
+         {
+            fprintf(STDERR,
+         "   Was %s written with the same maximum IDAT chunk size (%d bytes),",
+                 inname,PNG_ZBUF_SIZE);
+            fprintf(STDERR,
+              "\n   filtering heuristic (libpng default), compression");
+            fprintf(STDERR,
+              " level (zlib default),\n   and zlib version (%s)?\n\n",
+              ZLIB_VERSION);
+            wrote_question=1;
+         }
+         FCLOSE(fpin);
+         FCLOSE(fpout);
+         return (0);
+      }
+   }
+
+   FCLOSE(fpin);
+   FCLOSE(fpout);
+
+   return (0);
+}
+
+/* input and output filenames */
+#ifdef RISCOS
+static PNG_CONST char *inname = "pngtest/png";
+static PNG_CONST char *outname = "pngout/png";
+#else
+static PNG_CONST char *inname = "pngtest.png";
+static PNG_CONST char *outname = "pngout.png";
+#endif
+
+int
+main(int argc, char *argv[])
+{
+   int multiple = 0;
+   int ierror = 0;
+
+   fprintf(STDERR, "Testing libpng version %s\n", PNG_LIBPNG_VER_STRING);
+   fprintf(STDERR, "   with zlib   version %s\n", ZLIB_VERSION);
+   fprintf(STDERR,"%s",png_get_copyright(NULL));
+   /* Show the version of libpng used in building the library */
+   fprintf(STDERR," library (%lu):%s",
+      (unsigned long)png_access_version_number(),
+      png_get_header_version(NULL));
+   /* Show the version of libpng used in building the application */
+   fprintf(STDERR," pngtest (%lu):%s", (unsigned long)PNG_LIBPNG_VER,
+      PNG_HEADER_VERSION_STRING);
+   fprintf(STDERR," png_sizeof(png_struct)=%ld, png_sizeof(png_info)=%ld\n",
+                    (long)png_sizeof(png_struct), (long)png_sizeof(png_info));
+
+   /* Do some consistency checking on the memory allocation settings, I'm
+      not sure this matters, but it is nice to know, the first of these
+      tests should be impossible because of the way the macros are set
+      in pngconf.h */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+      fprintf(STDERR, " NOTE: Zlib compiled for max 64k, libpng not\n");
+#endif
+   /* I think the following can happen. */
+#if !defined(MAXSEG_64K) && defined(PNG_MAX_MALLOC_64K)
+      fprintf(STDERR, " NOTE: libpng compiled for max 64k, zlib not\n");
+#endif
+
+   if (strcmp(png_libpng_ver, PNG_LIBPNG_VER_STRING))
+   {
+      fprintf(STDERR,
+         "Warning: versions are different between png.h and png.c\n");
+      fprintf(STDERR, "  png.h version: %s\n", PNG_LIBPNG_VER_STRING);
+      fprintf(STDERR, "  png.c version: %s\n\n", png_libpng_ver);
+      ++ierror;
+   }
+
+   if (argc > 1)
+   {
+      if (strcmp(argv[1], "-m") == 0)
+      {
+         multiple = 1;
+         status_dots_requested = 0;
+      }
+      else if (strcmp(argv[1], "-mv") == 0 ||
+               strcmp(argv[1], "-vm") == 0 )
+      {
+         multiple = 1;
+         verbose = 1;
+         status_dots_requested = 1;
+      }
+      else if (strcmp(argv[1], "-v") == 0)
+      {
+         verbose = 1;
+         status_dots_requested = 1;
+         inname = argv[2];
+      }
+      else
+      {
+         inname = argv[1];
+         status_dots_requested = 0;
+      }
+   }
+
+   if (!multiple && argc == 3+verbose)
+     outname = argv[2+verbose];
+
+   if ((!multiple && argc > 3+verbose) || (multiple && argc < 2))
+   {
+     fprintf(STDERR,
+       "usage: %s [infile.png] [outfile.png]\n\t%s -m {infile.png}\n",
+        argv[0], argv[0]);
+     fprintf(STDERR,
+       "  reads/writes one PNG file (without -m) or multiple files (-m)\n");
+     fprintf(STDERR,
+       "  with -m %s is used as a temporary file\n", outname);
+     exit(1);
+   }
+
+   if (multiple)
+   {
+      int i;
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+      int allocation_now = current_allocation;
+#endif
+      for (i=2; i<argc; ++i)
+      {
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+         int k;
+#endif
+         int kerror;
+         fprintf(STDERR, "Testing %s:",argv[i]);
+         kerror = test_one_file(argv[i], outname);
+         if (kerror == 0)
+         {
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+            fprintf(STDERR, "\n PASS (%lu zero samples)\n",
+               (unsigned long)zero_samples);
+#else
+            fprintf(STDERR, " PASS\n");
+#endif
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+            for (k=0; k<256; k++)
+               if(filters_used[k])
+                  fprintf(STDERR, " Filter %d was used %lu times\n",
+                     k,(unsigned long)filters_used[k]);
+#endif
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+         if(tIME_chunk_present != 0)
+            fprintf(STDERR, " tIME = %s\n",tIME_string);
+         tIME_chunk_present = 0;
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+         }
+         else
+         {
+            fprintf(STDERR, " FAIL\n");
+            ierror += kerror;
+         }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         if (allocation_now != current_allocation)
+            fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
+               current_allocation-allocation_now);
+         if (current_allocation != 0)
+         {
+            memory_infop pinfo = pinformation;
+
+            fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
+               current_allocation);
+            while (pinfo != NULL)
+            {
+               fprintf(STDERR, " %lu bytes at %x\n", (unsigned long)pinfo->size,
+                 (unsigned int) pinfo->pointer);
+               pinfo = pinfo->next;
+            }
+         }
+#endif
+      }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         fprintf(STDERR, " Current memory allocation: %10d bytes\n",
+            current_allocation);
+         fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
+            maximum_allocation);
+         fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
+            total_allocation);
+         fprintf(STDERR, "     Number of allocations: %10d\n",
+            num_allocations);
+#endif
+   }
+   else
+   {
+      int i;
+      for (i=0; i<3; ++i)
+      {
+         int kerror;
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         int allocation_now = current_allocation;
+#endif
+         if (i == 1) status_dots_requested = 1;
+         else if(verbose == 0)status_dots_requested = 0;
+         if (i == 0 || verbose == 1 || ierror != 0)
+            fprintf(STDERR, "Testing %s:",inname);
+         kerror = test_one_file(inname, outname);
+         if(kerror == 0)
+         {
+            if(verbose == 1 || i == 2)
+            {
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+                int k;
+#endif
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+                fprintf(STDERR, "\n PASS (%lu zero samples)\n",
+                   (unsigned long)zero_samples);
+#else
+                fprintf(STDERR, " PASS\n");
+#endif
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+                for (k=0; k<256; k++)
+                   if(filters_used[k])
+                      fprintf(STDERR, " Filter %d was used %lu times\n",
+                         k,(unsigned long)filters_used[k]);
+#endif
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+             if(tIME_chunk_present != 0)
+                fprintf(STDERR, " tIME = %s\n",tIME_string);
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+            }
+         }
+         else
+         {
+            if(verbose == 0 && i != 2)
+               fprintf(STDERR, "Testing %s:",inname);
+            fprintf(STDERR, " FAIL\n");
+            ierror += kerror;
+         }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         if (allocation_now != current_allocation)
+             fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
+               current_allocation-allocation_now);
+         if (current_allocation != 0)
+         {
+             memory_infop pinfo = pinformation;
+
+             fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
+                current_allocation);
+             while (pinfo != NULL)
+             {
+                fprintf(STDERR," %lu bytes at %x\n",
+                   (unsigned long)pinfo->size, (unsigned int)pinfo->pointer);
+                pinfo = pinfo->next;
+             }
+          }
+#endif
+       }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+       fprintf(STDERR, " Current memory allocation: %10d bytes\n",
+          current_allocation);
+       fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
+          maximum_allocation);
+       fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
+          total_allocation);
+       fprintf(STDERR, "     Number of allocations: %10d\n",
+            num_allocations);
+#endif
+   }
+
+#ifdef PNGTEST_TIMING
+   t_stop = (float)clock();
+   t_misc += (t_stop - t_start);
+   t_start = t_stop;
+   fprintf(STDERR," CPU time used = %.3f seconds",
+      (t_misc+t_decode+t_encode)/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR," (decoding %.3f,\n",
+      t_decode/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR,"        encoding %.3f ,",
+      t_encode/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR," other %.3f seconds)\n\n",
+      t_misc/(float)CLOCKS_PER_SEC);
+#endif
+
+   if (ierror == 0)
+      fprintf(STDERR, "libpng passes test\n");
+   else
+      fprintf(STDERR, "libpng FAILS test\n");
+   return (int)(ierror != 0);
+}
+
+/* Generate a compiler error if there is an old png.h in the search path. */
+typedef version_1_2_29 your_png_h_is_not_version_1_2_29;
diff --git a/PNG/pngtrans.c b/PNG/pngtrans.c
new file mode 100644
index 0000000..1640095
--- /dev/null
+++ b/PNG/pngtrans.c
@@ -0,0 +1,662 @@
+
+/* pngtrans.c - transforms the data in a row (used by both readers and writers)
+ *
+ * Last changed in libpng 1.2.17 May 15, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* turn on BGR-to-RGB mapping */
+void PNGAPI
+png_set_bgr(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_bgr\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_BGR;
+}
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* turn on 16 bit byte swapping */
+void PNGAPI
+png_set_swap(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_swap\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth == 16)
+      png_ptr->transformations |= PNG_SWAP_BYTES;
+}
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* turn on pixel packing */
+void PNGAPI
+png_set_packing(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_packing\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth < 8)
+   {
+      png_ptr->transformations |= PNG_PACK;
+      png_ptr->usr_bit_depth = 8;
+   }
+}
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* turn on packed pixel swapping */
+void PNGAPI
+png_set_packswap(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_packswap\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth < 8)
+      png_ptr->transformations |= PNG_PACKSWAP;
+}
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+void PNGAPI
+png_set_shift(png_structp png_ptr, png_color_8p true_bits)
+{
+   png_debug(1, "in png_set_shift\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_SHIFT;
+   png_ptr->shift = *true_bits;
+}
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+int PNGAPI
+png_set_interlace_handling(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_interlace handling\n");
+   if (png_ptr && png_ptr->interlaced)
+   {
+      png_ptr->transformations |= PNG_INTERLACE;
+      return (7);
+   }
+
+   return (1);
+}
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte on read, or remove a filler or alpha byte on write.
+ * The filler type has changed in v0.95 to allow future 2-byte fillers
+ * for 48-bit input data, as well as to avoid problems with some compilers
+ * that don't like bytes as parameters.
+ */
+void PNGAPI
+png_set_filler(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+{
+   png_debug(1, "in png_set_filler\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_FILLER;
+   png_ptr->filler = (png_byte)filler;
+   if (filler_loc == PNG_FILLER_AFTER)
+      png_ptr->flags |= PNG_FLAG_FILLER_AFTER;
+   else
+      png_ptr->flags &= ~PNG_FLAG_FILLER_AFTER;
+
+   /* This should probably go in the "do_read_filler" routine.
+    * I attempted to do that in libpng-1.0.1a but that caused problems
+    * so I restored it in libpng-1.0.2a
+   */
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      png_ptr->usr_channels = 4;
+   }
+
+   /* Also I added this in libpng-1.0.2a (what happens when we expand
+    * a less-than-8-bit grayscale to GA? */
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY && png_ptr->bit_depth >= 8)
+   {
+      png_ptr->usr_channels = 2;
+   }
+}
+
+#if !defined(PNG_1_0_X)
+/* Added to libpng-1.2.7 */
+void PNGAPI
+png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+{
+   png_debug(1, "in png_set_add_alpha\n");
+   if(png_ptr == NULL) return;
+   png_set_filler(png_ptr, filler, filler_loc);
+   png_ptr->transformations |= PNG_ADD_ALPHA;
+}
+#endif
+
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_swap_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_swap_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_SWAP_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_invert_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_invert_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_INVERT_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+void PNGAPI
+png_set_invert_mono(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_invert_mono\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_INVERT_MONO;
+}
+
+/* invert monochrome grayscale data */
+void /* PRIVATE */
+png_do_invert(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_invert\n");
+  /* This test removed from libpng version 1.0.13 and 1.2.0:
+   *   if (row_info->bit_depth == 1 &&
+   */
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row == NULL || row_info == NULL)
+     return;
+#endif
+   if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i++)
+      {
+         *rp = (png_byte)(~(*rp));
+         rp++;
+      }
+   }
+   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+      row_info->bit_depth == 8)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i+=2)
+      {
+         *rp = (png_byte)(~(*rp));
+         rp+=2;
+      }
+   }
+   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+      row_info->bit_depth == 16)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i+=4)
+      {
+         *rp = (png_byte)(~(*rp));
+         *(rp+1) = (png_byte)(~(*(rp+1)));
+         rp+=4;
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* swaps byte order on 16 bit depth images */
+void /* PRIVATE */
+png_do_swap(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_swap\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->bit_depth == 16)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop= row_info->width * row_info->channels;
+
+      for (i = 0; i < istop; i++, rp += 2)
+      {
+         png_byte t = *rp;
+         *rp = *(rp + 1);
+         *(rp + 1) = t;
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+static PNG_CONST png_byte onebppswaptable[256] = {
+   0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+   0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+   0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+   0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+   0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+   0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+   0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+   0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+   0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+   0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+   0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+   0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+   0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+   0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+   0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+   0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+   0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+   0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+   0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+   0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+   0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+   0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+   0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+   0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+   0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+   0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+   0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+   0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+   0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+   0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+   0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+   0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+
+static PNG_CONST png_byte twobppswaptable[256] = {
+   0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0,
+   0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
+   0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4,
+   0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
+   0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8,
+   0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
+   0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC,
+   0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
+   0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1,
+   0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
+   0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5,
+   0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
+   0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9,
+   0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
+   0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD,
+   0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
+   0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2,
+   0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
+   0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6,
+   0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
+   0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA,
+   0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
+   0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE,
+   0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
+   0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3,
+   0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
+   0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7,
+   0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
+   0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB,
+   0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
+   0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF,
+   0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
+};
+
+static PNG_CONST png_byte fourbppswaptable[256] = {
+   0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+   0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0,
+   0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
+   0x81, 0x91, 0xA1, 0xB1, 0xC1, 0xD1, 0xE1, 0xF1,
+   0x02, 0x12, 0x22, 0x32, 0x42, 0x52, 0x62, 0x72,
+   0x82, 0x92, 0xA2, 0xB2, 0xC2, 0xD2, 0xE2, 0xF2,
+   0x03, 0x13, 0x23, 0x33, 0x43, 0x53, 0x63, 0x73,
+   0x83, 0x93, 0xA3, 0xB3, 0xC3, 0xD3, 0xE3, 0xF3,
+   0x04, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74,
+   0x84, 0x94, 0xA4, 0xB4, 0xC4, 0xD4, 0xE4, 0xF4,
+   0x05, 0x15, 0x25, 0x35, 0x45, 0x55, 0x65, 0x75,
+   0x85, 0x95, 0xA5, 0xB5, 0xC5, 0xD5, 0xE5, 0xF5,
+   0x06, 0x16, 0x26, 0x36, 0x46, 0x56, 0x66, 0x76,
+   0x86, 0x96, 0xA6, 0xB6, 0xC6, 0xD6, 0xE6, 0xF6,
+   0x07, 0x17, 0x27, 0x37, 0x47, 0x57, 0x67, 0x77,
+   0x87, 0x97, 0xA7, 0xB7, 0xC7, 0xD7, 0xE7, 0xF7,
+   0x08, 0x18, 0x28, 0x38, 0x48, 0x58, 0x68, 0x78,
+   0x88, 0x98, 0xA8, 0xB8, 0xC8, 0xD8, 0xE8, 0xF8,
+   0x09, 0x19, 0x29, 0x39, 0x49, 0x59, 0x69, 0x79,
+   0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9, 0xE9, 0xF9,
+   0x0A, 0x1A, 0x2A, 0x3A, 0x4A, 0x5A, 0x6A, 0x7A,
+   0x8A, 0x9A, 0xAA, 0xBA, 0xCA, 0xDA, 0xEA, 0xFA,
+   0x0B, 0x1B, 0x2B, 0x3B, 0x4B, 0x5B, 0x6B, 0x7B,
+   0x8B, 0x9B, 0xAB, 0xBB, 0xCB, 0xDB, 0xEB, 0xFB,
+   0x0C, 0x1C, 0x2C, 0x3C, 0x4C, 0x5C, 0x6C, 0x7C,
+   0x8C, 0x9C, 0xAC, 0xBC, 0xCC, 0xDC, 0xEC, 0xFC,
+   0x0D, 0x1D, 0x2D, 0x3D, 0x4D, 0x5D, 0x6D, 0x7D,
+   0x8D, 0x9D, 0xAD, 0xBD, 0xCD, 0xDD, 0xED, 0xFD,
+   0x0E, 0x1E, 0x2E, 0x3E, 0x4E, 0x5E, 0x6E, 0x7E,
+   0x8E, 0x9E, 0xAE, 0xBE, 0xCE, 0xDE, 0xEE, 0xFE,
+   0x0F, 0x1F, 0x2F, 0x3F, 0x4F, 0x5F, 0x6F, 0x7F,
+   0x8F, 0x9F, 0xAF, 0xBF, 0xCF, 0xDF, 0xEF, 0xFF
+};
+
+/* swaps pixel packing order within bytes */
+void /* PRIVATE */
+png_do_packswap(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_packswap\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->bit_depth < 8)
+   {
+      png_bytep rp, end, table;
+
+      end = row + row_info->rowbytes;
+
+      if (row_info->bit_depth == 1)
+         table = (png_bytep)onebppswaptable;
+      else if (row_info->bit_depth == 2)
+         table = (png_bytep)twobppswaptable;
+      else if (row_info->bit_depth == 4)
+         table = (png_bytep)fourbppswaptable;
+      else
+         return;
+
+      for (rp = row; rp < end; rp++)
+         *rp = table[*rp];
+   }
+}
+#endif /* PNG_READ_PACKSWAP_SUPPORTED or PNG_WRITE_PACKSWAP_SUPPORTED */
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
+    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+/* remove filler or alpha byte(s) */
+void /* PRIVATE */
+png_do_strip_filler(png_row_infop row_info, png_bytep row, png_uint_32 flags)
+{
+   png_debug(1, "in png_do_strip_filler\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_bytep sp=row;
+      png_bytep dp=row;
+      png_uint_32 row_width=row_info->width;
+      png_uint_32 i;
+
+      if ((row_info->color_type == PNG_COLOR_TYPE_RGB ||
+         (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
+         (flags & PNG_FLAG_STRIP_ALPHA))) &&
+         row_info->channels == 4)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            /* This converts from RGBX or RGBA to RGB */
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               dp+=3; sp+=4;
+               for (i = 1; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp++;
+               }
+            }
+            /* This converts from XRGB or ARGB to RGB */
+            else
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 24;
+            row_info->rowbytes = row_width * 3;
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               /* This converts from RRGGBBXX or RRGGBBAA to RRGGBB */
+               sp += 8; dp += 6;
+               for (i = 1; i < row_width; i++)
+               {
+                  /* This could be (although png_memcpy is probably slower):
+                  png_memcpy(dp, sp, 6);
+                  sp += 8;
+                  dp += 6;
+                  */
+
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp += 2;
+               }
+            }
+            else
+            {
+               /* This converts from XXRRGGBB or AARRGGBB to RRGGBB */
+               for (i = 0; i < row_width; i++)
+               {
+                  /* This could be (although png_memcpy is probably slower):
+                  png_memcpy(dp, sp, 6);
+                  sp += 8;
+                  dp += 6;
+                  */
+
+                  sp+=2;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 48;
+            row_info->rowbytes = row_width * 6;
+         }
+         row_info->channels = 3;
+      }
+      else if ((row_info->color_type == PNG_COLOR_TYPE_GRAY ||
+         (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+         (flags & PNG_FLAG_STRIP_ALPHA))) &&
+          row_info->channels == 2)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            /* This converts from GX or GA to G */
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  sp++;
+               }
+            }
+            /* This converts from XG or AG to G */
+            else
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 8;
+            row_info->rowbytes = row_width;
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               /* This converts from GGXX or GGAA to GG */
+               sp += 4; dp += 2;
+               for (i = 1; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp += 2;
+               }
+            }
+            else
+            {
+               /* This converts from XXGG or AAGG to GG */
+               for (i = 0; i < row_width; i++)
+               {
+                  sp += 2;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+         row_info->channels = 1;
+      }
+      if (flags & PNG_FLAG_STRIP_ALPHA)
+        row_info->color_type &= ~PNG_COLOR_MASK_ALPHA;
+   }
+}
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* swaps red and blue bytes within a pixel */
+void /* PRIVATE */
+png_do_bgr(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_bgr\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 3)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 2);
+               *(rp + 2) = save;
+            }
+         }
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 4)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 2);
+               *(rp + 2) = save;
+            }
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 6)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 4);
+               *(rp + 4) = save;
+               save = *(rp + 1);
+               *(rp + 1) = *(rp + 5);
+               *(rp + 5) = save;
+            }
+         }
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 8)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 4);
+               *(rp + 4) = save;
+               save = *(rp + 1);
+               *(rp + 1) = *(rp + 5);
+               *(rp + 5) = save;
+            }
+         }
+      }
+   }
+}
+#endif /* PNG_READ_BGR_SUPPORTED or PNG_WRITE_BGR_SUPPORTED */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+void PNGAPI
+png_set_user_transform_info(png_structp png_ptr, png_voidp
+   user_transform_ptr, int user_transform_depth, int user_transform_channels)
+{
+   png_debug(1, "in png_set_user_transform_info\n");
+   if(png_ptr == NULL) return;
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   png_ptr->user_transform_ptr = user_transform_ptr;
+   png_ptr->user_transform_depth = (png_byte)user_transform_depth;
+   png_ptr->user_transform_channels = (png_byte)user_transform_channels;
+#else
+   if(user_transform_ptr || user_transform_depth || user_transform_channels)
+      png_warning(png_ptr,
+        "This version of libpng does not support user transform info");
+#endif
+}
+#endif
+
+/* This function returns a pointer to the user_transform_ptr associated with
+ * the user transform functions.  The application should free any memory
+ * associated with this pointer before png_write_destroy and png_read_destroy
+ * are called.
+ */
+png_voidp PNGAPI
+png_get_user_transform_ptr(png_structp png_ptr)
+{
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   if (png_ptr == NULL) return (NULL);
+   return ((png_voidp)png_ptr->user_transform_ptr);
+#else
+   return (NULL);
+#endif
+}
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngvcrd.c b/PNG/pngvcrd.c
new file mode 100644
index 0000000..ce4233e
--- /dev/null
+++ b/PNG/pngvcrd.c
@@ -0,0 +1 @@
+/* pnggvrd.c was removed from libpng-1.2.20. */
diff --git a/PNG/pngwio.c b/PNG/pngwio.c
new file mode 100644
index 0000000..371a4fa
--- /dev/null
+++ b/PNG/pngwio.c
@@ -0,0 +1,234 @@
+
+/* pngwio.c - functions for data output
+ *
+ * Last changed in libpng 1.2.13 November 13, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all output.  Users who need
+ * special handling are expected to write functions that have the same
+ * arguments as these and perform similar functions, but that possibly
+ * use different output methods.  Note that you shouldn't change these
+ * functions, but rather write replacement functions and then change
+ * them at run time with png_set_write_fn(...).
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Write the data to whatever output you are using.  The default routine
+   writes to a file pointer.  Note that this routine sometimes gets called
+   with very small lengths, so you should implement some kind of simple
+   buffering if you are using unbuffered writes.  This should never be asked
+   to write more than 64K on a 16 bit machine.  */
+
+void /* PRIVATE */
+png_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   if (png_ptr->write_data_fn != NULL )
+      (*(png_ptr->write_data_fn))(png_ptr, data, length);
+   else
+      png_error(png_ptr, "Call to NULL write function");
+}
+
+#if !defined(PNG_NO_STDIO)
+/* This is the function that does the actual writing of data.  If you are
+   not writing to a standard C stream, you should create a replacement
+   write_data function and use it at run time with png_set_write_fn(), rather
+   than changing the library. */
+#ifndef USE_FAR_KEYWORD
+void PNGAPI
+png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+
+   if(png_ptr == NULL) return;
+#if defined(_WIN32_WCE)
+   if ( !WriteFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+      check = 0;
+#else
+   check = fwrite(data, 1, length, (png_FILE_p)(png_ptr->io_ptr));
+#endif
+   if (check != length)
+      png_error(png_ptr, "Write Error");
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+void PNGAPI
+png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+   png_byte *near_data;  /* Needs to be "png_byte *" instead of "png_bytep" */
+   png_FILE_p io_ptr;
+
+   if(png_ptr == NULL) return;
+   /* Check if data really is near. If so, use usual code. */
+   near_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)near_data == data)
+   {
+#if defined(_WIN32_WCE)
+      if ( !WriteFile(io_ptr, near_data, length, &check, NULL) )
+         check = 0;
+#else
+      check = fwrite(near_data, 1, length, io_ptr);
+#endif
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t written, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         written = MIN(NEAR_BUF_SIZE, remaining);
+         png_memcpy(buf, data, written); /* copy far buffer to near buffer */
+#if defined(_WIN32_WCE)
+         if ( !WriteFile(io_ptr, buf, written, &err, NULL) )
+            err = 0;
+#else
+         err = fwrite(buf, 1, written, io_ptr);
+#endif
+         if (err != written)
+            break;
+         else
+            check += err;
+         data += written;
+         remaining -= written;
+      }
+      while (remaining != 0);
+   }
+   if (check != length)
+      png_error(png_ptr, "Write Error");
+}
+
+#endif
+#endif
+
+/* This function is called to output any data pending writing (normally
+   to disk).  After png_flush is called, there should be no data pending
+   writing in any buffers. */
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+void /* PRIVATE */
+png_flush(png_structp png_ptr)
+{
+   if (png_ptr->output_flush_fn != NULL)
+      (*(png_ptr->output_flush_fn))(png_ptr);
+}
+
+#if !defined(PNG_NO_STDIO)
+void PNGAPI
+png_default_flush(png_structp png_ptr)
+{
+#if !defined(_WIN32_WCE)
+   png_FILE_p io_ptr;
+#endif
+   if(png_ptr == NULL) return;
+#if !defined(_WIN32_WCE)
+   io_ptr = (png_FILE_p)CVT_PTR((png_ptr->io_ptr));
+   if (io_ptr != NULL)
+      fflush(io_ptr);
+#endif
+}
+#endif
+#endif
+
+/* This function allows the application to supply new output functions for
+   libpng if standard C streams aren't being used.
+
+   This function takes as its arguments:
+   png_ptr       - pointer to a png output data structure
+   io_ptr        - pointer to user supplied structure containing info about
+                   the output functions.  May be NULL.
+   write_data_fn - pointer to a new output function that takes as its
+                   arguments a pointer to a png_struct, a pointer to
+                   data to be written, and a 32-bit unsigned int that is
+                   the number of bytes to be written.  The new write
+                   function should call png_error(png_ptr, "Error msg")
+                   to exit and output any fatal error messages.
+   flush_data_fn - pointer to a new flush function that takes as its
+                   arguments a pointer to a png_struct.  After a call to
+                   the flush function, there should be no data in any buffers
+                   or pending transmission.  If the output method doesn't do
+                   any buffering of ouput, a function prototype must still be
+                   supplied although it doesn't have to do anything.  If
+                   PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile
+                   time, output_flush_fn will be ignored, although it must be
+                   supplied for compatibility. */
+void PNGAPI
+png_set_write_fn(png_structp png_ptr, png_voidp io_ptr,
+   png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = io_ptr;
+
+#if !defined(PNG_NO_STDIO)
+   if (write_data_fn != NULL)
+      png_ptr->write_data_fn = write_data_fn;
+   else
+      png_ptr->write_data_fn = png_default_write_data;
+#else
+   png_ptr->write_data_fn = write_data_fn;
+#endif
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+   if (output_flush_fn != NULL)
+      png_ptr->output_flush_fn = output_flush_fn;
+   else
+      png_ptr->output_flush_fn = png_default_flush;
+#else
+   png_ptr->output_flush_fn = output_flush_fn;
+#endif
+#endif /* PNG_WRITE_FLUSH_SUPPORTED */
+
+   /* It is an error to read while writing a png file */
+   if (png_ptr->read_data_fn != NULL)
+   {
+      png_ptr->read_data_fn = NULL;
+      png_warning(png_ptr,
+         "Attempted to set both read_data_fn and write_data_fn in");
+      png_warning(png_ptr,
+         "the same structure.  Resetting read_data_fn to NULL.");
+   }
+}
+
+#if defined(USE_FAR_KEYWORD)
+#if defined(_MSC_VER)
+void *png_far_to_near(png_structp png_ptr,png_voidp ptr, int check)
+{
+   void *near_ptr;
+   void FAR *far_ptr;
+   FP_OFF(near_ptr) = FP_OFF(ptr);
+   far_ptr = (void FAR *)near_ptr;
+   if(check != 0)
+      if(FP_SEG(ptr) != FP_SEG(far_ptr))
+         png_error(png_ptr,"segment lost in conversion");
+   return(near_ptr);
+}
+#  else
+void *png_far_to_near(png_structp png_ptr,png_voidp ptr, int check)
+{
+   void *near_ptr;
+   void FAR *far_ptr;
+   near_ptr = (void FAR *)ptr;
+   far_ptr = (void FAR *)near_ptr;
+   if(check != 0)
+      if(far_ptr != ptr)
+         png_error(png_ptr,"segment lost in conversion");
+   return(near_ptr);
+}
+#   endif
+#   endif
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngwrite.c b/PNG/pngwrite.c
new file mode 100644
index 0000000..7d02ad7
--- /dev/null
+++ b/PNG/pngwrite.c
@@ -0,0 +1,1532 @@
+
+/* pngwrite.c - general routines to write a PNG file
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+/* get internal access to png.h */
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Writes all the PNG information.  This is the suggested way to use the
+ * library.  If you have a new chunk to add, make a function to write it,
+ * and put it in the correct location here.  If you want the chunk written
+ * after the image data, put it in png_write_end().  I strongly encourage
+ * you to supply a PNG_INFO_ flag, and check info_ptr->valid before writing
+ * the chunk, as that will keep the code from breaking if you want to just
+ * write a plain PNG file.  If you have long comments, I suggest writing
+ * them in png_write_end(), and compressing them.
+ */
+void PNGAPI
+png_write_info_before_PLTE(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_write_info_before_PLTE\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
+   {
+   png_write_sig(png_ptr); /* write PNG signature */
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)&&(png_ptr->mng_features_permitted))
+   {
+      png_warning(png_ptr,"MNG features are not allowed in a PNG datastream");
+      png_ptr->mng_features_permitted=0;
+   }
+#endif
+   /* write IHDR information. */
+   png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height,
+      info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type,
+      info_ptr->filter_type,
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+      info_ptr->interlace_type);
+#else
+      0);
+#endif
+   /* the rest of these check to see if the valid field has the appropriate
+      flag set, and if it does, writes the chunk. */
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_gAMA)
+   {
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+      png_write_gAMA(png_ptr, info_ptr->gamma);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_gAMA_fixed(png_ptr, info_ptr->int_gamma);
+#  endif
+#endif
+   }
+#endif
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sRGB)
+      png_write_sRGB(png_ptr, (int)info_ptr->srgb_intent);
+#endif
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_iCCP)
+      png_write_iCCP(png_ptr, info_ptr->iccp_name, PNG_COMPRESSION_TYPE_BASE,
+                     info_ptr->iccp_profile, (int)info_ptr->iccp_proflen);
+#endif
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sBIT)
+      png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
+#endif
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_cHRM)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      png_write_cHRM(png_ptr,
+         info_ptr->x_white, info_ptr->y_white,
+         info_ptr->x_red, info_ptr->y_red,
+         info_ptr->x_green, info_ptr->y_green,
+         info_ptr->x_blue, info_ptr->y_blue);
+#else
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_cHRM_fixed(png_ptr,
+         info_ptr->int_x_white, info_ptr->int_y_white,
+         info_ptr->int_x_red, info_ptr->int_y_red,
+         info_ptr->int_x_green, info_ptr->int_y_green,
+         info_ptr->int_x_blue, info_ptr->int_y_blue);
+#  endif
+#endif
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && !(up->location & PNG_HAVE_PLTE) &&
+            !(up->location & PNG_HAVE_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            if (up->size == 0)
+               png_warning(png_ptr, "Writing zero-length unknown chunk");
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+      png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
+   }
+}
+
+void PNGAPI
+png_write_info(png_structp png_ptr, png_infop info_ptr)
+{
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+   int i;
+#endif
+
+   png_debug(1, "in png_write_info\n");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_write_info_before_PLTE(png_ptr, info_ptr);
+
+   if (info_ptr->valid & PNG_INFO_PLTE)
+      png_write_PLTE(png_ptr, info_ptr->palette,
+         (png_uint_32)info_ptr->num_palette);
+   else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      png_error(png_ptr, "Valid palette required for paletted images");
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_tRNS)
+      {
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+         /* invert the alpha channel (in tRNS) */
+         if ((png_ptr->transformations & PNG_INVERT_ALPHA) &&
+            info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         {
+            int j;
+            for (j=0; j<(int)info_ptr->num_trans; j++)
+               info_ptr->trans[j] = (png_byte)(255 - info_ptr->trans[j]);
+         }
+#endif
+      png_write_tRNS(png_ptr, info_ptr->trans, &(info_ptr->trans_values),
+         info_ptr->num_trans, info_ptr->color_type);
+      }
+#endif
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_bKGD)
+      png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type);
+#endif
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_hIST)
+      png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette);
+#endif
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+      png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset,
+         info_ptr->offset_unit_type);
+#endif
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pCAL)
+      png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0,
+         info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams,
+         info_ptr->pcal_units, info_ptr->pcal_params);
+#endif
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sCAL)
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+      png_write_sCAL(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_pixel_width, info_ptr->scal_pixel_height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_s_width, info_ptr->scal_s_height);
+#else
+      png_warning(png_ptr,
+          "png_write_sCAL not supported; sCAL chunk not written.");
+#endif
+#endif
+#endif
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+      png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit,
+         info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type);
+#endif
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_tIME)
+   {
+      png_write_tIME(png_ptr, &(info_ptr->mod_time));
+      png_ptr->mode |= PNG_WROTE_tIME;
+   }
+#endif
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sPLT)
+     for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+       png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
+#endif
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+   /* Check to see if we need to write text chunks */
+   for (i = 0; i < info_ptr->num_text; i++)
+   {
+      png_debug2(2, "Writing header text chunk %d, type %d\n", i,
+         info_ptr->text[i].compression);
+      /* an internationalized chunk? */
+      if (info_ptr->text[i].compression > 0)
+      {
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+          /* write international chunk */
+          png_write_iTXt(png_ptr,
+                         info_ptr->text[i].compression,
+                         info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
+                         info_ptr->text[i].text);
+#else
+          png_warning(png_ptr, "Unable to write international text");
+#endif
+          /* Mark this chunk as written */
+          info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+      }
+      /* If we want a compressed text chunk */
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
+      {
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+         /* write compressed chunk */
+         png_write_zTXt(png_ptr, info_ptr->text[i].key,
+            info_ptr->text[i].text, 0,
+            info_ptr->text[i].compression);
+#else
+         png_warning(png_ptr, "Unable to write compressed text");
+#endif
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+      }
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+      {
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+         /* write uncompressed chunk */
+         png_write_tEXt(png_ptr, info_ptr->text[i].key,
+                         info_ptr->text[i].text,
+                         0);
+#else
+         png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+      }
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && (up->location & PNG_HAVE_PLTE) &&
+            !(up->location & PNG_HAVE_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+}
+
+/* Writes the end of the PNG file.  If you don't want to write comments or
+ * time information, you can pass NULL for info.  If you already wrote these
+ * in png_write_info(), do not write them again here.  If you have long
+ * comments, I suggest writing them here, and compressing them.
+ */
+void PNGAPI
+png_write_end(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_write_end\n");
+   if (png_ptr == NULL)
+      return;
+   if (!(png_ptr->mode & PNG_HAVE_IDAT))
+      png_error(png_ptr, "No IDATs written into file");
+
+   /* see if user wants us to write information chunks */
+   if (info_ptr != NULL)
+   {
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+      int i; /* local index variable */
+#endif
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+      /* check to see if user has supplied a time chunk */
+      if ((info_ptr->valid & PNG_INFO_tIME) &&
+         !(png_ptr->mode & PNG_WROTE_tIME))
+         png_write_tIME(png_ptr, &(info_ptr->mod_time));
+#endif
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+      /* loop through comment chunks */
+      for (i = 0; i < info_ptr->num_text; i++)
+      {
+         png_debug2(2, "Writing trailer text chunk %d, type %d\n", i,
+            info_ptr->text[i].compression);
+         /* an internationalized chunk? */
+         if (info_ptr->text[i].compression > 0)
+         {
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+             /* write international chunk */
+             png_write_iTXt(png_ptr,
+                         info_ptr->text[i].compression,
+                         info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
+                         info_ptr->text[i].text);
+#else
+             png_warning(png_ptr, "Unable to write international text");
+#endif
+             /* Mark this chunk as written */
+             info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         }
+         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
+         {
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+            /* write compressed chunk */
+            png_write_zTXt(png_ptr, info_ptr->text[i].key,
+               info_ptr->text[i].text, 0,
+               info_ptr->text[i].compression);
+#else
+            png_warning(png_ptr, "Unable to write compressed text");
+#endif
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+         }
+         else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+         {
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+            /* write uncompressed chunk */
+            png_write_tEXt(png_ptr, info_ptr->text[i].key,
+               info_ptr->text[i].text, 0);
+#else
+            png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         }
+      }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && (up->location & PNG_AFTER_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+   }
+
+   png_ptr->mode |= PNG_AFTER_IDAT;
+
+   /* write end of PNG file */
+   png_write_IEND(png_ptr);
+}
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+#if !defined(_WIN32_WCE)
+/* "time.h" functions are not supported on WindowsCE */
+void PNGAPI
+png_convert_from_struct_tm(png_timep ptime, struct tm FAR * ttime)
+{
+   png_debug(1, "in png_convert_from_struct_tm\n");
+   ptime->year = (png_uint_16)(1900 + ttime->tm_year);
+   ptime->month = (png_byte)(ttime->tm_mon + 1);
+   ptime->day = (png_byte)ttime->tm_mday;
+   ptime->hour = (png_byte)ttime->tm_hour;
+   ptime->minute = (png_byte)ttime->tm_min;
+   ptime->second = (png_byte)ttime->tm_sec;
+}
+
+void PNGAPI
+png_convert_from_time_t(png_timep ptime, time_t ttime)
+{
+   struct tm *tbuf;
+
+   png_debug(1, "in png_convert_from_time_t\n");
+   tbuf = gmtime(&ttime);
+   png_convert_from_struct_tm(ptime, tbuf);
+}
+#endif
+#endif
+
+/* Initialize png_ptr structure, and allocate any memory needed */
+png_structp PNGAPI
+png_create_write_struct(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_write_struct_2(user_png_ver, error_ptr, error_fn,
+      warn_fn, png_voidp_NULL, png_malloc_ptr_NULL, png_free_ptr_NULL));
+}
+
+/* Alternate initialize png_ptr structure, and allocate any memory needed */
+png_structp PNGAPI
+png_create_write_struct_2(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_structp png_ptr;
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   jmp_buf jmpbuf;
+#endif
+#endif
+   int i;
+   png_debug(1, "in png_create_write_struct\n");
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
+      (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr);
+#else
+   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+#endif /* PNG_USER_MEM_SUPPORTED */
+   if (png_ptr == NULL)
+      return (NULL);
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_ptr->jmpbuf))
+#endif
+   {
+      png_free(png_ptr, png_ptr->zbuf);
+      png_ptr->zbuf=NULL;
+      png_destroy_struct(png_ptr);
+      return (NULL);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#endif
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
+
+   if(user_png_ver)
+   {
+     i=0;
+     do
+     {
+       if(user_png_ver[i] != png_libpng_ver[i])
+          png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+     } while (png_libpng_ver[i++]);
+   }
+
+   if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH)
+   {
+     /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so
+      * we must recompile any applications that use any older library version.
+      * For versions after libpng 1.0, we will be compatible, so we need
+      * only check the first digit.
+      */
+     if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] ||
+         (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) ||
+         (user_png_ver[0] == '0' && user_png_ver[2] < '9'))
+     {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+        char msg[80];
+        if (user_png_ver)
+        {
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
+             user_png_ver);
+          png_warning(png_ptr, msg);
+        }
+        png_snprintf(msg, 80,
+           "Application  is  running with png.c from libpng-%.20s",
+           png_libpng_ver);
+        png_warning(png_ptr, msg);
+#endif
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+        png_ptr->flags=0;
+#endif
+        png_error(png_ptr,
+           "Incompatible libpng version in application and library");
+     }
+   }
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+      (png_uint_32)png_ptr->zbuf_size);
+
+   png_set_write_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL,
+      png_flush_ptr_NULL);
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_set_filter_heuristics(png_ptr, PNG_FILTER_HEURISTIC_DEFAULT,
+      1, png_doublep_NULL, png_doublep_NULL);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* Applications that neglect to set up their own setjmp() and then encounter
+   a png_error() will longjmp here.  Since the jmpbuf is then meaningless we
+   abort instead of returning. */
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+      PNG_ABORT();
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#else
+   if (setjmp(png_ptr->jmpbuf))
+      PNG_ABORT();
+#endif
+#endif
+   return (png_ptr);
+}
+
+/* Initialize png_ptr structure, and allocate any memory needed */
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Deprecated. */
+#undef png_write_init
+void PNGAPI
+png_write_init(png_structp png_ptr)
+{
+   /* We only come here via pre-1.0.7-compiled applications */
+   png_write_init_2(png_ptr, "1.0.6 or earlier", 0, 0);
+}
+
+void PNGAPI
+png_write_init_2(png_structp png_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size, png_size_t png_info_size)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   if(png_ptr == NULL) return;
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+   if(png_sizeof(png_struct) > png_struct_size ||
+      png_sizeof(png_info) > png_info_size)
+   {
+      char msg[80];
+      png_ptr->warning_fn=NULL;
+      if (user_png_ver)
+      {
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
+           user_png_ver);
+        png_warning(png_ptr, msg);
+      }
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
+         png_libpng_ver);
+      png_warning(png_ptr, msg);
+   }
+#endif
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The png struct allocated by the application for writing is too small.");
+     }
+   if(png_sizeof(png_info) > png_info_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The info struct allocated by the application for writing is too small.");
+     }
+   png_write_init_3(&png_ptr, user_png_ver, png_struct_size);
+}
+#endif /* PNG_1_0_X || PNG_1_2_X */
+
+
+void PNGAPI
+png_write_init_3(png_structpp ptr_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size)
+{
+   png_structp png_ptr=*ptr_ptr;
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp; /* to save current jump buffer */
+#endif
+
+   int i = 0;
+
+   if (png_ptr == NULL)
+      return;
+
+   do
+   {
+     if (user_png_ver[i] != png_libpng_ver[i])
+     {
+#ifdef PNG_LEGACY_SUPPORTED
+       png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+#else
+       png_ptr->warning_fn=NULL;
+       png_warning(png_ptr,
+     "Application uses deprecated png_write_init() and should be recompiled.");
+       break;
+#endif
+     }
+   } while (png_libpng_ver[i++]);
+
+   png_debug(1, "in png_write_init_3\n");
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* save jump buffer and error functions */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   if (png_sizeof(png_struct) > png_struct_size)
+     {
+       png_destroy_struct(png_ptr);
+       png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+       *ptr_ptr = png_ptr;
+     }
+
+   /* reset all variables to 0 */
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* restore jump buffer */
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+   png_set_write_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL,
+      png_flush_ptr_NULL);
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+      (png_uint_32)png_ptr->zbuf_size);
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_set_filter_heuristics(png_ptr, PNG_FILTER_HEURISTIC_DEFAULT,
+      1, png_doublep_NULL, png_doublep_NULL);
+#endif
+}
+
+/* Write a few rows of image data.  If the image is interlaced,
+ * either you will have to write the 7 sub images, or, if you
+ * have called png_set_interlace_handling(), you will have to
+ * "write" the image seven times.
+ */
+void PNGAPI
+png_write_rows(png_structp png_ptr, png_bytepp row,
+   png_uint_32 num_rows)
+{
+   png_uint_32 i; /* row counter */
+   png_bytepp rp; /* row pointer */
+
+   png_debug(1, "in png_write_rows\n");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* loop through the rows */
+   for (i = 0, rp = row; i < num_rows; i++, rp++)
+   {
+      png_write_row(png_ptr, *rp);
+   }
+}
+
+/* Write the image.  You only need to call this function once, even
+ * if you are writing an interlaced image.
+ */
+void PNGAPI
+png_write_image(png_structp png_ptr, png_bytepp image)
+{
+   png_uint_32 i; /* row index */
+   int pass, num_pass; /* pass variables */
+   png_bytepp rp; /* points to current row */
+
+   if (png_ptr == NULL)
+      return;
+
+   png_debug(1, "in png_write_image\n");
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* intialize interlace handling.  If image is not interlaced,
+      this will set pass to 1 */
+   num_pass = png_set_interlace_handling(png_ptr);
+#else
+   num_pass = 1;
+#endif
+   /* loop through passes */
+   for (pass = 0; pass < num_pass; pass++)
+   {
+      /* loop through image */
+      for (i = 0, rp = image; i < png_ptr->height; i++, rp++)
+      {
+         png_write_row(png_ptr, *rp);
+      }
+   }
+}
+
+/* called by user to write a row of image data */
+void PNGAPI
+png_write_row(png_structp png_ptr, png_bytep row)
+{
+   if (png_ptr == NULL)
+      return;
+   png_debug2(1, "in png_write_row (row %ld, pass %d)\n",
+      png_ptr->row_number, png_ptr->pass);
+
+   /* initialize transformations and other stuff if first time */
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+   /* make sure we wrote the header info */
+   if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
+      png_error(png_ptr,
+         "png_write_info was never called before png_write_row.");
+
+   /* check for transforms that have been set but were defined out */
+#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_warning(png_ptr, "PNG_WRITE_PACKSWAP_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined.");
+#endif
+
+      png_write_start_row(png_ptr);
+   }
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* if interlaced and not interested in row, return */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if (png_ptr->row_number & 0x07)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 1:
+            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 3:
+            if ((png_ptr->row_number & 0x03) || png_ptr->width < 3)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 4:
+            if ((png_ptr->row_number & 0x03) != 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 5:
+            if ((png_ptr->row_number & 0x01) || png_ptr->width < 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 6:
+            if (!(png_ptr->row_number & 0x01))
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+      }
+   }
+#endif
+
+   /* set up row info for transformations */
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->usr_width;
+   png_ptr->row_info.channels = png_ptr->usr_channels;
+   png_ptr->row_info.bit_depth = png_ptr->usr_bit_depth;
+   png_ptr->row_info.pixel_depth = (png_byte)(png_ptr->row_info.bit_depth *
+      png_ptr->row_info.channels);
+
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+      png_ptr->row_info.width);
+
+   png_debug1(3, "row_info->color_type = %d\n", png_ptr->row_info.color_type);
+   png_debug1(3, "row_info->width = %lu\n", png_ptr->row_info.width);
+   png_debug1(3, "row_info->channels = %d\n", png_ptr->row_info.channels);
+   png_debug1(3, "row_info->bit_depth = %d\n", png_ptr->row_info.bit_depth);
+   png_debug1(3, "row_info->pixel_depth = %d\n", png_ptr->row_info.pixel_depth);
+   png_debug1(3, "row_info->rowbytes = %lu\n", png_ptr->row_info.rowbytes);
+
+   /* Copy user's row into buffer, leaving room for filter byte. */
+   png_memcpy_check(png_ptr, png_ptr->row_buf + 1, row,
+      png_ptr->row_info.rowbytes);
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* handle interlacing */
+   if (png_ptr->interlaced && png_ptr->pass < 6 &&
+      (png_ptr->transformations & PNG_INTERLACE))
+   {
+      png_do_write_interlace(&(png_ptr->row_info),
+         png_ptr->row_buf + 1, png_ptr->pass);
+      /* this should always get caught above, but still ... */
+      if (!(png_ptr->row_info.width))
+      {
+         png_write_finish_row(png_ptr);
+         return;
+      }
+   }
+#endif
+
+   /* handle other transformations */
+   if (png_ptr->transformations)
+      png_do_write_transformations(png_ptr);
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_write_intrapixel(&(png_ptr->row_info), png_ptr->row_buf + 1);
+   }
+#endif
+
+   /* Find a filter if necessary, filter the row and write it out. */
+   png_write_find_filter(png_ptr, &(png_ptr->row_info));
+
+   if (png_ptr->write_row_fn != NULL)
+      (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+}
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+/* Set the automatic flush interval or 0 to turn flushing off */
+void PNGAPI
+png_set_flush(png_structp png_ptr, int nrows)
+{
+   png_debug(1, "in png_set_flush\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flush_dist = (nrows < 0 ? 0 : nrows);
+}
+
+/* flush the current output buffers now */
+void PNGAPI
+png_write_flush(png_structp png_ptr)
+{
+   int wrote_IDAT;
+
+   png_debug(1, "in png_write_flush\n");
+   if (png_ptr == NULL)
+      return;
+   /* We have already written out all of the data */
+   if (png_ptr->row_number >= png_ptr->num_rows)
+     return;
+
+   do
+   {
+      int ret;
+
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_SYNC_FLUSH);
+      wrote_IDAT = 0;
+
+      /* check for compression errors */
+      if (ret != Z_OK)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* write the IDAT and reset the zlib output buffer */
+         png_write_IDAT(png_ptr, png_ptr->zbuf,
+                        png_ptr->zbuf_size);
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         wrote_IDAT = 1;
+      }
+   } while(wrote_IDAT == 1);
+
+   /* If there is any data left to be output, write it into a new IDAT */
+   if (png_ptr->zbuf_size != png_ptr->zstream.avail_out)
+   {
+      /* write the IDAT and reset the zlib output buffer */
+      png_write_IDAT(png_ptr, png_ptr->zbuf,
+                     png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   }
+   png_ptr->flush_rows = 0;
+   png_flush(png_ptr);
+}
+#endif /* PNG_WRITE_FLUSH_SUPPORTED */
+
+/* free all memory used by the write */
+void PNGAPI
+png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
+{
+   png_structp png_ptr = NULL;
+   png_infop info_ptr = NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn = NULL;
+   png_voidp mem_ptr = NULL;
+#endif
+
+   png_debug(1, "in png_destroy_write_struct\n");
+   if (png_ptr_ptr != NULL)
+   {
+      png_ptr = *png_ptr_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+      free_fn = png_ptr->free_fn;
+      mem_ptr = png_ptr->mem_ptr;
+#endif
+   }
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr != NULL)
+   {
+      free_fn = png_ptr->free_fn;
+      mem_ptr = png_ptr->mem_ptr;
+   }
+#endif
+
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (info_ptr != NULL)
+   {
+      if (png_ptr != NULL)
+      {
+        png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+        if (png_ptr->num_chunk_list)
+        {
+           png_free(png_ptr, png_ptr->chunk_list);
+           png_ptr->chunk_list=NULL;
+           png_ptr->num_chunk_list=0;
+        }
+#endif
+      }
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+
+   if (png_ptr != NULL)
+   {
+      png_write_destroy(png_ptr);
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)png_ptr);
+#endif
+      *png_ptr_ptr = NULL;
+   }
+}
+
+
+/* Free any memory used in png_ptr struct (old method) */
+void /* PRIVATE */
+png_write_destroy(png_structp png_ptr)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp; /* save jump buffer */
+#endif
+   png_error_ptr error_fn;
+   png_error_ptr warning_fn;
+   png_voidp error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn;
+#endif
+
+   png_debug(1, "in png_write_destroy\n");
+   /* free any memory zlib uses */
+   deflateEnd(&png_ptr->zstream);
+
+   /* free our memory.  png_free checks NULL for us. */
+   png_free(png_ptr, png_ptr->zbuf);
+   png_free(png_ptr, png_ptr->row_buf);
+#ifndef PNG_NO_WRITE_FILTERING
+   png_free(png_ptr, png_ptr->prev_row);
+   png_free(png_ptr, png_ptr->sub_row);
+   png_free(png_ptr, png_ptr->up_row);
+   png_free(png_ptr, png_ptr->avg_row);
+   png_free(png_ptr, png_ptr->paeth_row);
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_free(png_ptr, png_ptr->time_buffer);
+#endif
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_free(png_ptr, png_ptr->prev_filters);
+   png_free(png_ptr, png_ptr->filter_weights);
+   png_free(png_ptr, png_ptr->inv_filter_weights);
+   png_free(png_ptr, png_ptr->filter_costs);
+   png_free(png_ptr, png_ptr->inv_filter_costs);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* reset structure */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   error_fn = png_ptr->error_fn;
+   warning_fn = png_ptr->warning_fn;
+   error_ptr = png_ptr->error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+#endif
+
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+   png_ptr->error_ptr = error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr->free_fn = free_fn;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+}
+
+/* Allow the application to select one or more row filters to use. */
+void PNGAPI
+png_set_filter(png_structp png_ptr, int method, int filters)
+{
+   png_debug(1, "in png_set_filter\n");
+   if (png_ptr == NULL)
+      return;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (method == PNG_INTRAPIXEL_DIFFERENCING))
+         method = PNG_FILTER_TYPE_BASE;
+#endif
+   if (method == PNG_FILTER_TYPE_BASE)
+   {
+      switch (filters & (PNG_ALL_FILTERS | 0x07))
+      {
+#ifndef PNG_NO_WRITE_FILTER
+         case 5:
+         case 6:
+         case 7: png_warning(png_ptr, "Unknown row filter for method 0");
+#endif /* PNG_NO_WRITE_FILTER */
+         case PNG_FILTER_VALUE_NONE:
+              png_ptr->do_filter=PNG_FILTER_NONE; break;
+#ifndef PNG_NO_WRITE_FILTER
+         case PNG_FILTER_VALUE_SUB:
+              png_ptr->do_filter=PNG_FILTER_SUB; break;
+         case PNG_FILTER_VALUE_UP:
+              png_ptr->do_filter=PNG_FILTER_UP; break;
+         case PNG_FILTER_VALUE_AVG:
+              png_ptr->do_filter=PNG_FILTER_AVG; break;
+         case PNG_FILTER_VALUE_PAETH:
+              png_ptr->do_filter=PNG_FILTER_PAETH; break;
+         default: png_ptr->do_filter = (png_byte)filters; break;
+#else
+         default: png_warning(png_ptr, "Unknown row filter for method 0");
+#endif /* PNG_NO_WRITE_FILTER */
+      }
+
+      /* If we have allocated the row_buf, this means we have already started
+       * with the image and we should have allocated all of the filter buffers
+       * that have been selected.  If prev_row isn't already allocated, then
+       * it is too late to start using the filters that need it, since we
+       * will be missing the data in the previous row.  If an application
+       * wants to start and stop using particular filters during compression,
+       * it should start out with all of the filters, and then add and
+       * remove them after the start of compression.
+       */
+      if (png_ptr->row_buf != NULL)
+      {
+#ifndef PNG_NO_WRITE_FILTER
+         if ((png_ptr->do_filter & PNG_FILTER_SUB) && png_ptr->sub_row == NULL)
+         {
+            png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
+              (png_ptr->rowbytes + 1));
+            png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_UP) && png_ptr->up_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Up filter after starting");
+               png_ptr->do_filter &= ~PNG_FILTER_UP;
+            }
+            else
+            {
+               png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
+            }
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_AVG) && png_ptr->avg_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Average filter after starting");
+               png_ptr->do_filter &= ~PNG_FILTER_AVG;
+            }
+            else
+            {
+               png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
+            }
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_PAETH) &&
+             png_ptr->paeth_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Paeth filter after starting");
+               png_ptr->do_filter &= (png_byte)(~PNG_FILTER_PAETH);
+            }
+            else
+            {
+               png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
+            }
+         }
+
+         if (png_ptr->do_filter == PNG_NO_FILTERS)
+#endif /* PNG_NO_WRITE_FILTER */
+            png_ptr->do_filter = PNG_FILTER_NONE;
+      }
+   }
+   else
+      png_error(png_ptr, "Unknown custom filter method");
+}
+
+/* This allows us to influence the way in which libpng chooses the "best"
+ * filter for the current scanline.  While the "minimum-sum-of-absolute-
+ * differences metric is relatively fast and effective, there is some
+ * question as to whether it can be improved upon by trying to keep the
+ * filtered data going to zlib more consistent, hopefully resulting in
+ * better compression.
+ */
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)      /* GRR 970116 */
+void PNGAPI
+png_set_filter_heuristics(png_structp png_ptr, int heuristic_method,
+   int num_weights, png_doublep filter_weights,
+   png_doublep filter_costs)
+{
+   int i;
+
+   png_debug(1, "in png_set_filter_heuristics\n");
+   if (png_ptr == NULL)
+      return;
+   if (heuristic_method >= PNG_FILTER_HEURISTIC_LAST)
+   {
+      png_warning(png_ptr, "Unknown filter heuristic method");
+      return;
+   }
+
+   if (heuristic_method == PNG_FILTER_HEURISTIC_DEFAULT)
+   {
+      heuristic_method = PNG_FILTER_HEURISTIC_UNWEIGHTED;
+   }
+
+   if (num_weights < 0 || filter_weights == NULL ||
+      heuristic_method == PNG_FILTER_HEURISTIC_UNWEIGHTED)
+   {
+      num_weights = 0;
+   }
+
+   png_ptr->num_prev_filters = (png_byte)num_weights;
+   png_ptr->heuristic_method = (png_byte)heuristic_method;
+
+   if (num_weights > 0)
+   {
+      if (png_ptr->prev_filters == NULL)
+      {
+         png_ptr->prev_filters = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_byte) * num_weights));
+
+         /* To make sure that the weighting starts out fairly */
+         for (i = 0; i < num_weights; i++)
+         {
+            png_ptr->prev_filters[i] = 255;
+         }
+      }
+
+      if (png_ptr->filter_weights == NULL)
+      {
+         png_ptr->filter_weights = (png_uint_16p)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
+
+         png_ptr->inv_filter_weights = (png_uint_16p)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
+         for (i = 0; i < num_weights; i++)
+         {
+            png_ptr->inv_filter_weights[i] =
+            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
+         }
+      }
+
+      for (i = 0; i < num_weights; i++)
+      {
+         if (filter_weights[i] < 0.0)
+         {
+            png_ptr->inv_filter_weights[i] =
+            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
+         }
+         else
+         {
+            png_ptr->inv_filter_weights[i] =
+               (png_uint_16)((double)PNG_WEIGHT_FACTOR*filter_weights[i]+0.5);
+            png_ptr->filter_weights[i] =
+               (png_uint_16)((double)PNG_WEIGHT_FACTOR/filter_weights[i]+0.5);
+         }
+      }
+   }
+
+   /* If, in the future, there are other filter methods, this would
+    * need to be based on png_ptr->filter.
+    */
+   if (png_ptr->filter_costs == NULL)
+   {
+      png_ptr->filter_costs = (png_uint_16p)png_malloc(png_ptr,
+         (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
+
+      png_ptr->inv_filter_costs = (png_uint_16p)png_malloc(png_ptr,
+         (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
+
+      for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
+      {
+         png_ptr->inv_filter_costs[i] =
+         png_ptr->filter_costs[i] = PNG_COST_FACTOR;
+      }
+   }
+
+   /* Here is where we set the relative costs of the different filters.  We
+    * should take the desired compression level into account when setting
+    * the costs, so that Paeth, for instance, has a high relative cost at low
+    * compression levels, while it has a lower relative cost at higher
+    * compression settings.  The filter types are in order of increasing
+    * relative cost, so it would be possible to do this with an algorithm.
+    */
+   for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
+   {
+      if (filter_costs == NULL || filter_costs[i] < 0.0)
+      {
+         png_ptr->inv_filter_costs[i] =
+         png_ptr->filter_costs[i] = PNG_COST_FACTOR;
+      }
+      else if (filter_costs[i] >= 1.0)
+      {
+         png_ptr->inv_filter_costs[i] =
+            (png_uint_16)((double)PNG_COST_FACTOR / filter_costs[i] + 0.5);
+         png_ptr->filter_costs[i] =
+            (png_uint_16)((double)PNG_COST_FACTOR * filter_costs[i] + 0.5);
+      }
+   }
+}
+#endif /* PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
+
+void PNGAPI
+png_set_compression_level(png_structp png_ptr, int level)
+{
+   png_debug(1, "in png_set_compression_level\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_LEVEL;
+   png_ptr->zlib_level = level;
+}
+
+void PNGAPI
+png_set_compression_mem_level(png_structp png_ptr, int mem_level)
+{
+   png_debug(1, "in png_set_compression_mem_level\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL;
+   png_ptr->zlib_mem_level = mem_level;
+}
+
+void PNGAPI
+png_set_compression_strategy(png_structp png_ptr, int strategy)
+{
+   png_debug(1, "in png_set_compression_strategy\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY;
+   png_ptr->zlib_strategy = strategy;
+}
+
+void PNGAPI
+png_set_compression_window_bits(png_structp png_ptr, int window_bits)
+{
+   if (png_ptr == NULL)
+      return;
+   if (window_bits > 15)
+      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
+   else if (window_bits < 8)
+      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
+#ifndef WBITS_8_OK
+   /* avoid libpng bug with 256-byte windows */
+   if (window_bits == 8)
+     {
+       png_warning(png_ptr, "Compression window is being reset to 512");
+       window_bits=9;
+     }
+#endif
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS;
+   png_ptr->zlib_window_bits = window_bits;
+}
+
+void PNGAPI
+png_set_compression_method(png_structp png_ptr, int method)
+{
+   png_debug(1, "in png_set_compression_method\n");
+   if (png_ptr == NULL)
+      return;
+   if (method != 8)
+      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_METHOD;
+   png_ptr->zlib_method = method;
+}
+
+void PNGAPI
+png_set_write_status_fn(png_structp png_ptr, png_write_status_ptr write_row_fn)
+{
+   if (png_ptr == NULL)
+      return;
+   png_ptr->write_row_fn = write_row_fn;
+}
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+void PNGAPI
+png_set_write_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
+   write_user_transform_fn)
+{
+   png_debug(1, "in png_set_write_user_transform_fn\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->transformations |= PNG_USER_TRANSFORM;
+   png_ptr->write_user_transform_fn = write_user_transform_fn;
+}
+#endif
+
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_write_png(png_structp png_ptr, png_infop info_ptr,
+              int transforms, voidp params)
+{
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+   /* invert the alpha channel from opacity to transparency */
+   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
+       png_set_invert_alpha(png_ptr);
+#endif
+
+   /* Write the file header information. */
+   png_write_info(png_ptr, info_ptr);
+
+   /* ------ these transformations don't touch the info structure ------- */
+
+#if defined(PNG_WRITE_INVERT_SUPPORTED)
+   /* invert monochrome pixels */
+   if (transforms & PNG_TRANSFORM_INVERT_MONO)
+       png_set_invert_mono(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+   /* Shift the pixels up to a legal bit depth and fill in
+    * as appropriate to correctly scale the image.
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT)
+               && (info_ptr->valid & PNG_INFO_sBIT))
+       png_set_shift(png_ptr, &info_ptr->sig_bit);
+#endif
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+   /* pack pixels into bytes */
+   if (transforms & PNG_TRANSFORM_PACKING)
+       png_set_packing(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+   /* swap location of alpha bytes from ARGB to RGBA */
+   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
+       png_set_swap_alpha(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED)
+   /* Get rid of filler (OR ALPHA) bytes, pack XRGB/RGBX/ARGB/RGBA into
+    * RGB (4 channels -> 3 channels). The second parameter is not used.
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_FILLER)
+       png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE);
+#endif
+
+#if defined(PNG_WRITE_BGR_SUPPORTED)
+   /* flip BGR pixels to RGB */
+   if (transforms & PNG_TRANSFORM_BGR)
+       png_set_bgr(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SWAP_SUPPORTED)
+   /* swap bytes of 16-bit files to most significant byte first */
+   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
+       png_set_swap(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+   /* swap bits of 1, 2, 4 bit packed pixel formats */
+   if (transforms & PNG_TRANSFORM_PACKSWAP)
+       png_set_packswap(png_ptr);
+#endif
+
+   /* ----------------------- end of transformations ------------------- */
+
+   /* write the bits */
+   if (info_ptr->valid & PNG_INFO_IDAT)
+       png_write_image(png_ptr, info_ptr->row_pointers);
+
+   /* It is REQUIRED to call this to finish writing the rest of the file */
+   png_write_end(png_ptr, info_ptr);
+
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
+}
+#endif
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngwtran.c b/PNG/pngwtran.c
new file mode 100644
index 0000000..0372fe6
--- /dev/null
+++ b/PNG/pngwtran.c
@@ -0,0 +1,572 @@
+
+/* pngwtran.c - transforms the data in a row for PNG writers
+ *
+ * Last changed in libpng 1.2.9 April 14, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Transform the data according to the user's wishes.  The order of
+ * transformations is significant.
+ */
+void /* PRIVATE */
+png_do_write_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_do_write_transformations\n");
+
+   if (png_ptr == NULL)
+      return;
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   if (png_ptr->transformations & PNG_USER_TRANSFORM)
+      if(png_ptr->write_user_transform_fn != NULL)
+        (*(png_ptr->write_user_transform_fn)) /* user write transform function */
+          (png_ptr,                    /* png_ptr */
+           &(png_ptr->row_info),       /* row_info:     */
+             /*  png_uint_32 width;          width of row */
+             /*  png_uint_32 rowbytes;       number of bytes in row */
+             /*  png_byte color_type;        color type of pixels */
+             /*  png_byte bit_depth;         bit depth of samples */
+             /*  png_byte channels;          number of channels (1-4) */
+             /*  png_byte pixel_depth;       bits per pixel (depth*channels) */
+           png_ptr->row_buf + 1);      /* start of pixel data for row */
+#endif
+#if defined(PNG_WRITE_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_do_strip_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->flags);
+#endif
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_do_packswap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_do_pack(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         (png_uint_32)png_ptr->bit_depth);
+#endif
+#if defined(PNG_WRITE_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_do_swap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_do_shift(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->shift));
+#endif
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_ALPHA)
+      png_do_write_swap_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_ALPHA)
+      png_do_write_invert_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_do_bgr(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_do_invert(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+}
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+/* Pack pixels into bytes.  Pass the true bit depth in bit_depth.  The
+ * row_info bit depth should be 8 (one pixel per byte).  The channels
+ * should be 1 (this only happens on grayscale and paletted images).
+ */
+void /* PRIVATE */
+png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
+{
+   png_debug(1, "in png_do_pack\n");
+   if (row_info->bit_depth == 8 &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      row_info->channels == 1)
+   {
+      switch ((int)bit_depth)
+      {
+         case 1:
+         {
+            png_bytep sp, dp;
+            int mask, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            mask = 0x80;
+            v = 0;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (*sp != 0)
+                  v |= mask;
+               sp++;
+               if (mask > 1)
+                  mask >>= 1;
+               else
+               {
+                  mask = 0x80;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+            }
+            if (mask != 0x80)
+               *dp = (png_byte)v;
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp, dp;
+            int shift, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            shift = 6;
+            v = 0;
+            for (i = 0; i < row_width; i++)
+            {
+               png_byte value;
+
+               value = (png_byte)(*sp & 0x03);
+               v |= (value << shift);
+               if (shift == 0)
+               {
+                  shift = 6;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+               else
+                  shift -= 2;
+               sp++;
+            }
+            if (shift != 6)
+               *dp = (png_byte)v;
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp, dp;
+            int shift, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            shift = 4;
+            v = 0;
+            for (i = 0; i < row_width; i++)
+            {
+               png_byte value;
+
+               value = (png_byte)(*sp & 0x0f);
+               v |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 4;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+               else
+                  shift -= 4;
+
+               sp++;
+            }
+            if (shift != 4)
+               *dp = (png_byte)v;
+            break;
+         }
+      }
+      row_info->bit_depth = (png_byte)bit_depth;
+      row_info->pixel_depth = (png_byte)(bit_depth * row_info->channels);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+         row_info->width);
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Shift pixel values to take advantage of whole range.  Pass the
+ * true number of bits in bit_depth.  The row should be packed
+ * according to row_info->bit_depth.  Thus, if you had a row of
+ * bit depth 4, but the pixels only had values from 0 to 7, you
+ * would pass 3 as bit_depth, and this routine would translate the
+ * data to 0 to 15.
+ */
+void /* PRIVATE */
+png_do_shift(png_row_infop row_info, png_bytep row, png_color_8p bit_depth)
+{
+   png_debug(1, "in png_do_shift\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL &&
+#else
+   if (
+#endif
+      row_info->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      int shift_start[4], shift_dec[4];
+      int channels = 0;
+
+      if (row_info->color_type & PNG_COLOR_MASK_COLOR)
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->red;
+         shift_dec[channels] = bit_depth->red;
+         channels++;
+         shift_start[channels] = row_info->bit_depth - bit_depth->green;
+         shift_dec[channels] = bit_depth->green;
+         channels++;
+         shift_start[channels] = row_info->bit_depth - bit_depth->blue;
+         shift_dec[channels] = bit_depth->blue;
+         channels++;
+      }
+      else
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->gray;
+         shift_dec[channels] = bit_depth->gray;
+         channels++;
+      }
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->alpha;
+         shift_dec[channels] = bit_depth->alpha;
+         channels++;
+      }
+
+      /* with low row depths, could only be grayscale, so one channel */
+      if (row_info->bit_depth < 8)
+      {
+         png_bytep bp = row;
+         png_uint_32 i;
+         png_byte mask;
+         png_uint_32 row_bytes = row_info->rowbytes;
+
+         if (bit_depth->gray == 1 && row_info->bit_depth == 2)
+            mask = 0x55;
+         else if (row_info->bit_depth == 4 && bit_depth->gray == 3)
+            mask = 0x11;
+         else
+            mask = 0xff;
+
+         for (i = 0; i < row_bytes; i++, bp++)
+         {
+            png_uint_16 v;
+            int j;
+
+            v = *bp;
+            *bp = 0;
+            for (j = shift_start[0]; j > -shift_dec[0]; j -= shift_dec[0])
+            {
+               if (j > 0)
+                  *bp |= (png_byte)((v << j) & 0xff);
+               else
+                  *bp |= (png_byte)((v >> (-j)) & mask);
+            }
+         }
+      }
+      else if (row_info->bit_depth == 8)
+      {
+         png_bytep bp = row;
+         png_uint_32 i;
+         png_uint_32 istop = channels * row_info->width;
+
+         for (i = 0; i < istop; i++, bp++)
+         {
+
+            png_uint_16 v;
+            int j;
+            int c = (int)(i%channels);
+
+            v = *bp;
+            *bp = 0;
+            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
+            {
+               if (j > 0)
+                  *bp |= (png_byte)((v << j) & 0xff);
+               else
+                  *bp |= (png_byte)((v >> (-j)) & 0xff);
+            }
+         }
+      }
+      else
+      {
+         png_bytep bp;
+         png_uint_32 i;
+         png_uint_32 istop = channels * row_info->width;
+
+         for (bp = row, i = 0; i < istop; i++)
+         {
+            int c = (int)(i%channels);
+            png_uint_16 value, v;
+            int j;
+
+            v = (png_uint_16)(((png_uint_16)(*bp) << 8) + *(bp + 1));
+            value = 0;
+            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
+            {
+               if (j > 0)
+                  value |= (png_uint_16)((v << j) & (png_uint_16)0xffff);
+               else
+                  value |= (png_uint_16)((v >> (-j)) & (png_uint_16)0xffff);
+            }
+            *bp++ = (png_byte)(value >> 8);
+            *bp++ = (png_byte)(value & 0xff);
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_swap_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This converts from ARGB to RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save;
+            }
+         }
+         /* This converts from AARRGGBB to RRGGBBAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save[2];
+               save[0] = *(sp++);
+               save[1] = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save[0];
+               *(dp++) = save[1];
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This converts from AG to GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save;
+            }
+         }
+         /* This converts from AAGG to GGAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save[2];
+               save[0] = *(sp++);
+               save[1] = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save[0];
+               *(dp++) = save[1];
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_invert_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This inverts the alpha channel in RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=3; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+         /* This inverts the alpha channel in RRGGBBAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=6; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This inverts the alpha channel in GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               *(dp++) = *(sp++);
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+         /* This inverts the alpha channel in GGAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=2; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+/* undoes intrapixel differencing  */
+void /* PRIVATE */
+png_do_write_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_intrapixel\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp)   = (png_byte)((*rp     - *(rp+1))&0xff);
+            *(rp+2) = (png_byte)((*(rp+2) - *(rp+1))&0xff);
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (*(rp  ) << 8) | *(rp+1);
+            png_uint_32 s1   = (*(rp+2) << 8) | *(rp+3);
+            png_uint_32 s2   = (*(rp+4) << 8) | *(rp+5);
+            png_uint_32 red  = (png_uint_32)((s0-s1) & 0xffffL);
+            png_uint_32 blue = (png_uint_32)((s2-s1) & 0xffffL);
+            *(rp  ) = (png_byte)((red >> 8) & 0xff);
+            *(rp+1) = (png_byte)(red & 0xff);
+            *(rp+4) = (png_byte)((blue >> 8) & 0xff);
+            *(rp+5) = (png_byte)(blue & 0xff);
+         }
+      }
+   }
+}
+#endif /* PNG_MNG_FEATURES_SUPPORTED */
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/PNG/pngwutil.c b/PNG/pngwutil.c
new file mode 100644
index 0000000..0774080
--- /dev/null
+++ b/PNG/pngwutil.c
@@ -0,0 +1,2802 @@
+
+/* pngwutil.c - utilities to write a PNG file
+ *
+ * Last changed in libpng 1.2.27 [April 29, 2008]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2008 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Place a 32-bit number into a buffer in PNG byte order.  We work
+ * with unsigned numbers for convenience, although one supported
+ * ancillary chunk uses signed (two's complement) numbers.
+ */
+void PNGAPI
+png_save_uint_32(png_bytep buf, png_uint_32 i)
+{
+   buf[0] = (png_byte)((i >> 24) & 0xff);
+   buf[1] = (png_byte)((i >> 16) & 0xff);
+   buf[2] = (png_byte)((i >> 8) & 0xff);
+   buf[3] = (png_byte)(i & 0xff);
+}
+
+/* The png_save_int_32 function assumes integers are stored in two's
+ * complement format.  If this isn't the case, then this routine needs to
+ * be modified to write data in two's complement format.
+ */
+void PNGAPI
+png_save_int_32(png_bytep buf, png_int_32 i)
+{
+   buf[0] = (png_byte)((i >> 24) & 0xff);
+   buf[1] = (png_byte)((i >> 16) & 0xff);
+   buf[2] = (png_byte)((i >> 8) & 0xff);
+   buf[3] = (png_byte)(i & 0xff);
+}
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+void PNGAPI
+png_save_uint_16(png_bytep buf, unsigned int i)
+{
+   buf[0] = (png_byte)((i >> 8) & 0xff);
+   buf[1] = (png_byte)(i & 0xff);
+}
+
+/* Write a PNG chunk all at once.  The type is an array of ASCII characters
+ * representing the chunk name.  The array must be at least 4 bytes in
+ * length, and does not need to be null terminated.  To be safe, pass the
+ * pre-defined chunk names here, and if you need a new one, define it
+ * where the others are defined.  The length is the length of the data.
+ * All the data must be present.  If that is not possible, use the
+ * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end()
+ * functions instead.
+ */
+void PNGAPI
+png_write_chunk(png_structp png_ptr, png_bytep chunk_name,
+   png_bytep data, png_size_t length)
+{
+   if(png_ptr == NULL) return;
+   png_write_chunk_start(png_ptr, chunk_name, (png_uint_32)length);
+   png_write_chunk_data(png_ptr, data, length);
+   png_write_chunk_end(png_ptr);
+}
+
+/* Write the start of a PNG chunk.  The type is the chunk type.
+ * The total_length is the sum of the lengths of all the data you will be
+ * passing in png_write_chunk_data().
+ */
+void PNGAPI
+png_write_chunk_start(png_structp png_ptr, png_bytep chunk_name,
+   png_uint_32 length)
+{
+   png_byte buf[4];
+   png_debug2(0, "Writing %s chunk (%lu bytes)\n", chunk_name, length);
+   if(png_ptr == NULL) return;
+
+   /* write the length */
+   png_save_uint_32(buf, length);
+   png_write_data(png_ptr, buf, (png_size_t)4);
+
+   /* write the chunk name */
+   png_write_data(png_ptr, chunk_name, (png_size_t)4);
+   /* reset the crc and run it over the chunk name */
+   png_reset_crc(png_ptr);
+   png_calculate_crc(png_ptr, chunk_name, (png_size_t)4);
+}
+
+/* Write the data of a PNG chunk started with png_write_chunk_start().
+ * Note that multiple calls to this function are allowed, and that the
+ * sum of the lengths from these calls *must* add up to the total_length
+ * given to png_write_chunk_start().
+ */
+void PNGAPI
+png_write_chunk_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   /* write the data, and run the CRC over it */
+   if(png_ptr == NULL) return;
+   if (data != NULL && length > 0)
+   {
+      png_calculate_crc(png_ptr, data, length);
+      png_write_data(png_ptr, data, length);
+   }
+}
+
+/* Finish a chunk started with png_write_chunk_start(). */
+void PNGAPI
+png_write_chunk_end(png_structp png_ptr)
+{
+   png_byte buf[4];
+
+   if(png_ptr == NULL) return;
+
+   /* write the crc */
+   png_save_uint_32(buf, png_ptr->crc);
+
+   png_write_data(png_ptr, buf, (png_size_t)4);
+}
+
+/* Simple function to write the signature.  If we have already written
+ * the magic bytes of the signature, or more likely, the PNG stream is
+ * being embedded into another stream and doesn't need its own signature,
+ * we should call png_set_sig_bytes() to tell libpng how many of the
+ * bytes have already been written.
+ */
+void /* PRIVATE */
+png_write_sig(png_structp png_ptr)
+{
+   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+   /* write the rest of the 8 byte signature */
+   png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes],
+      (png_size_t)8 - png_ptr->sig_bytes);
+   if(png_ptr->sig_bytes < 3)
+      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+}
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_iCCP_SUPPORTED)
+/*
+ * This pair of functions encapsulates the operation of (a) compressing a
+ * text string, and (b) issuing it later as a series of chunk data writes.
+ * The compression_state structure is shared context for these functions
+ * set up by the caller in order to make the whole mess thread-safe.
+ */
+
+typedef struct
+{
+    char *input;   /* the uncompressed input data */
+    int input_len;   /* its length */
+    int num_output_ptr; /* number of output pointers used */
+    int max_output_ptr; /* size of output_ptr */
+    png_charpp output_ptr; /* array of pointers to output */
+} compression_state;
+
+/* compress given text into storage in the png_ptr structure */
+static int /* PRIVATE */
+png_text_compress(png_structp png_ptr,
+        png_charp text, png_size_t text_len, int compression,
+        compression_state *comp)
+{
+   int ret;
+
+   comp->num_output_ptr = 0;
+   comp->max_output_ptr = 0;
+   comp->output_ptr = NULL;
+   comp->input = NULL;
+   comp->input_len = 0;
+
+   /* we may just want to pass the text right through */
+   if (compression == PNG_TEXT_COMPRESSION_NONE)
+   {
+       comp->input = text;
+       comp->input_len = text_len;
+       return((int)text_len);
+   }
+
+   if (compression >= PNG_TEXT_COMPRESSION_LAST)
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char msg[50];
+      png_snprintf(msg, 50, "Unknown compression type %d", compression);
+      png_warning(png_ptr, msg);
+#else
+      png_warning(png_ptr, "Unknown compression type");
+#endif
+   }
+
+   /* We can't write the chunk until we find out how much data we have,
+    * which means we need to run the compressor first and save the
+    * output.  This shouldn't be a problem, as the vast majority of
+    * comments should be reasonable, but we will set up an array of
+    * malloc'd pointers to be sure.
+    *
+    * If we knew the application was well behaved, we could simplify this
+    * greatly by assuming we can always malloc an output buffer large
+    * enough to hold the compressed text ((1001 * text_len / 1000) + 12)
+    * and malloc this directly.  The only time this would be a bad idea is
+    * if we can't malloc more than 64K and we have 64K of random input
+    * data, or if the input string is incredibly large (although this
+    * wouldn't cause a failure, just a slowdown due to swapping).
+    */
+
+   /* set up the compression buffers */
+   png_ptr->zstream.avail_in = (uInt)text_len;
+   png_ptr->zstream.next_in = (Bytef *)text;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   png_ptr->zstream.next_out = (Bytef *)png_ptr->zbuf;
+
+   /* this is the same compression loop as in png_write_row() */
+   do
+   {
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
+      if (ret != Z_OK)
+      {
+         /* error */
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+      /* check to see if we need more room */
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* make sure the output array has room */
+         if (comp->num_output_ptr >= comp->max_output_ptr)
+         {
+            int old_max;
+
+            old_max = comp->max_output_ptr;
+            comp->max_output_ptr = comp->num_output_ptr + 4;
+            if (comp->output_ptr != NULL)
+            {
+               png_charpp old_ptr;
+
+               old_ptr = comp->output_ptr;
+               comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                  (png_uint_32)(comp->max_output_ptr *
+                  png_sizeof (png_charpp)));
+               png_memcpy(comp->output_ptr, old_ptr, old_max
+                  * png_sizeof (png_charp));
+               png_free(png_ptr, old_ptr);
+            }
+            else
+               comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                  (png_uint_32)(comp->max_output_ptr *
+                  png_sizeof (png_charp)));
+         }
+
+         /* save the data */
+         comp->output_ptr[comp->num_output_ptr] = (png_charp)png_malloc(png_ptr,
+            (png_uint_32)png_ptr->zbuf_size);
+         png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
+            png_ptr->zbuf_size);
+         comp->num_output_ptr++;
+
+         /* and reset the buffer */
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+      }
+   /* continue until we don't have any more to compress */
+   } while (png_ptr->zstream.avail_in);
+
+   /* finish the compression */
+   do
+   {
+      /* tell zlib we are finished */
+      ret = deflate(&png_ptr->zstream, Z_FINISH);
+
+      if (ret == Z_OK)
+      {
+         /* check to see if we need more room */
+         if (!(png_ptr->zstream.avail_out))
+         {
+            /* check to make sure our output array has room */
+            if (comp->num_output_ptr >= comp->max_output_ptr)
+            {
+               int old_max;
+
+               old_max = comp->max_output_ptr;
+               comp->max_output_ptr = comp->num_output_ptr + 4;
+               if (comp->output_ptr != NULL)
+               {
+                  png_charpp old_ptr;
+
+                  old_ptr = comp->output_ptr;
+                  /* This could be optimized to realloc() */
+                  comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                     (png_uint_32)(comp->max_output_ptr *
+                     png_sizeof (png_charpp)));
+                  png_memcpy(comp->output_ptr, old_ptr,
+                     old_max * png_sizeof (png_charp));
+                  png_free(png_ptr, old_ptr);
+               }
+               else
+                  comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                     (png_uint_32)(comp->max_output_ptr *
+                     png_sizeof (png_charp)));
+            }
+
+            /* save off the data */
+            comp->output_ptr[comp->num_output_ptr] =
+               (png_charp)png_malloc(png_ptr, (png_uint_32)png_ptr->zbuf_size);
+            png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
+               png_ptr->zbuf_size);
+            comp->num_output_ptr++;
+
+            /* and reset the buffer pointers */
+            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            png_ptr->zstream.next_out = png_ptr->zbuf;
+         }
+      }
+      else if (ret != Z_STREAM_END)
+      {
+         /* we got an error */
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+   } while (ret != Z_STREAM_END);
+
+   /* text length is number of buffers plus last buffer */
+   text_len = png_ptr->zbuf_size * comp->num_output_ptr;
+   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
+      text_len += png_ptr->zbuf_size - (png_size_t)png_ptr->zstream.avail_out;
+
+   return((int)text_len);
+}
+
+/* ship the compressed text out via chunk writes */
+static void /* PRIVATE */
+png_write_compressed_data_out(png_structp png_ptr, compression_state *comp)
+{
+   int i;
+
+   /* handle the no-compression case */
+   if (comp->input)
+   {
+       png_write_chunk_data(png_ptr, (png_bytep)comp->input,
+                            (png_size_t)comp->input_len);
+       return;
+   }
+
+   /* write saved output buffers, if any */
+   for (i = 0; i < comp->num_output_ptr; i++)
+   {
+      png_write_chunk_data(png_ptr,(png_bytep)comp->output_ptr[i],
+         png_ptr->zbuf_size);
+      png_free(png_ptr, comp->output_ptr[i]);
+      comp->output_ptr[i]=NULL;
+   }
+   if (comp->max_output_ptr != 0)
+      png_free(png_ptr, comp->output_ptr);
+      comp->output_ptr=NULL;
+   /* write anything left in zbuf */
+   if (png_ptr->zstream.avail_out < (png_uint_32)png_ptr->zbuf_size)
+      png_write_chunk_data(png_ptr, png_ptr->zbuf,
+         png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+
+   /* reset zlib for another zTXt/iTXt or image data */
+   deflateReset(&png_ptr->zstream);
+   png_ptr->zstream.data_type = Z_BINARY;
+}
+#endif
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.  Note that the rest of this code depends upon this
+ * information being correct.
+ */
+void /* PRIVATE */
+png_write_IHDR(png_structp png_ptr, png_uint_32 width, png_uint_32 height,
+   int bit_depth, int color_type, int compression_type, int filter_type,
+   int interlace_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IHDR;
+#endif
+   int ret;
+
+   png_byte buf[13]; /* buffer to store the IHDR info */
+
+   png_debug(1, "in png_write_IHDR\n");
+   /* Check that we have valid input data from the application info */
+   switch (color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8:
+            case 16: png_ptr->channels = 1; break;
+            default: png_error(png_ptr,"Invalid bit depth for grayscale image");
+         }
+         break;
+      case PNG_COLOR_TYPE_RGB:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for RGB image");
+         png_ptr->channels = 3;
+         break;
+      case PNG_COLOR_TYPE_PALETTE:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8: png_ptr->channels = 1; break;
+            default: png_error(png_ptr, "Invalid bit depth for paletted image");
+         }
+         break;
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for grayscale+alpha image");
+         png_ptr->channels = 2;
+         break;
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for RGBA image");
+         png_ptr->channels = 4;
+         break;
+      default:
+         png_error(png_ptr, "Invalid image color type specified");
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid compression type specified");
+      compression_type = PNG_COMPRESSION_TYPE_BASE;
+   }
+
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if (
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+      !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) &&
+      (color_type == PNG_COLOR_TYPE_RGB ||
+       color_type == PNG_COLOR_TYPE_RGB_ALPHA) &&
+      (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) &&
+#endif
+      filter_type != PNG_FILTER_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid filter type specified");
+      filter_type = PNG_FILTER_TYPE_BASE;
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   if (interlace_type != PNG_INTERLACE_NONE &&
+      interlace_type != PNG_INTERLACE_ADAM7)
+   {
+      png_warning(png_ptr, "Invalid interlace type specified");
+      interlace_type = PNG_INTERLACE_ADAM7;
+   }
+#else
+   interlace_type=PNG_INTERLACE_NONE;
+#endif
+
+   /* save off the relevent information */
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->color_type = (png_byte)color_type;
+   png_ptr->interlaced = (png_byte)interlace_type;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+   png_ptr->width = width;
+   png_ptr->height = height;
+
+   png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width);
+   /* set the usr info, so any transformations can modify it */
+   png_ptr->usr_width = png_ptr->width;
+   png_ptr->usr_bit_depth = png_ptr->bit_depth;
+   png_ptr->usr_channels = png_ptr->channels;
+
+   /* pack the header information into the buffer */
+   png_save_uint_32(buf, width);
+   png_save_uint_32(buf + 4, height);
+   buf[8] = (png_byte)bit_depth;
+   buf[9] = (png_byte)color_type;
+   buf[10] = (png_byte)compression_type;
+   buf[11] = (png_byte)filter_type;
+   buf[12] = (png_byte)interlace_type;
+
+   /* write the chunk */
+   png_write_chunk(png_ptr, png_IHDR, buf, (png_size_t)13);
+
+   /* initialize zlib with PNG info */
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+   if (!(png_ptr->do_filter))
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
+         png_ptr->bit_depth < 8)
+         png_ptr->do_filter = PNG_FILTER_NONE;
+      else
+         png_ptr->do_filter = PNG_ALL_FILTERS;
+   }
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY))
+   {
+      if (png_ptr->do_filter != PNG_FILTER_NONE)
+         png_ptr->zlib_strategy = Z_FILTERED;
+      else
+         png_ptr->zlib_strategy = Z_DEFAULT_STRATEGY;
+   }
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_LEVEL))
+      png_ptr->zlib_level = Z_DEFAULT_COMPRESSION;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL))
+      png_ptr->zlib_mem_level = 8;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS))
+      png_ptr->zlib_window_bits = 15;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_METHOD))
+      png_ptr->zlib_method = 8;
+   ret = deflateInit2(&png_ptr->zstream, png_ptr->zlib_level,
+         png_ptr->zlib_method, png_ptr->zlib_window_bits,
+         png_ptr->zlib_mem_level, png_ptr->zlib_strategy);
+   if (ret != Z_OK)
+   {
+      if (ret == Z_VERSION_ERROR) png_error(png_ptr,
+          "zlib failed to initialize compressor -- version error");
+      if (ret == Z_STREAM_ERROR) png_error(png_ptr,
+           "zlib failed to initialize compressor -- stream error");
+      if (ret == Z_MEM_ERROR) png_error(png_ptr,
+           "zlib failed to initialize compressor -- mem error");
+      png_error(png_ptr, "zlib failed to initialize compressor");
+   }
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   /* libpng is not interested in zstream.data_type */
+   /* set it to a predefined value, to avoid its evaluation inside zlib */
+   png_ptr->zstream.data_type = Z_BINARY;
+
+   png_ptr->mode = PNG_HAVE_IHDR;
+}
+
+/* write the palette.  We are careful not to trust png_color to be in the
+ * correct order for PNG, so people can redefine it to any convenient
+ * structure.
+ */
+void /* PRIVATE */
+png_write_PLTE(png_structp png_ptr, png_colorp palette, png_uint_32 num_pal)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_PLTE;
+#endif
+   png_uint_32 i;
+   png_colorp pal_ptr;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_PLTE\n");
+   if ((
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+        !(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) &&
+#endif
+        num_pal == 0) || num_pal > 256)
+   {
+     if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+     {
+        png_error(png_ptr, "Invalid number of colors in palette");
+     }
+     else
+     {
+        png_warning(png_ptr, "Invalid number of colors in palette");
+        return;
+     }
+   }
+
+   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
+   {
+      png_warning(png_ptr,
+        "Ignoring request to write a PLTE chunk in grayscale PNG");
+      return;
+   }
+
+   png_ptr->num_palette = (png_uint_16)num_pal;
+   png_debug1(3, "num_palette = %d\n", png_ptr->num_palette);
+
+   png_write_chunk_start(png_ptr, png_PLTE, num_pal * 3);
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
+   {
+      buf[0] = pal_ptr->red;
+      buf[1] = pal_ptr->green;
+      buf[2] = pal_ptr->blue;
+      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
+   }
+#else
+   /* This is a little slower but some buggy compilers need to do this instead */
+   pal_ptr=palette;
+   for (i = 0; i < num_pal; i++)
+   {
+      buf[0] = pal_ptr[i].red;
+      buf[1] = pal_ptr[i].green;
+      buf[2] = pal_ptr[i].blue;
+      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
+   }
+#endif
+   png_write_chunk_end(png_ptr);
+   png_ptr->mode |= PNG_HAVE_PLTE;
+}
+
+/* write an IDAT chunk */
+void /* PRIVATE */
+png_write_IDAT(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IDAT;
+#endif
+   png_debug(1, "in png_write_IDAT\n");
+
+   /* Optimize the CMF field in the zlib stream. */
+   /* This hack of the zlib stream is compliant to the stream specification. */
+   if (!(png_ptr->mode & PNG_HAVE_IDAT) &&
+       png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
+   {
+      unsigned int z_cmf = data[0];  /* zlib compression method and flags */
+      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
+      {
+         /* Avoid memory underflows and multiplication overflows. */
+         /* The conditions below are practically always satisfied;
+            however, they still must be checked. */
+         if (length >= 2 &&
+             png_ptr->height < 16384 && png_ptr->width < 16384)
+         {
+            png_uint_32 uncompressed_idat_size = png_ptr->height *
+               ((png_ptr->width *
+               png_ptr->channels * png_ptr->bit_depth + 15) >> 3);
+            unsigned int z_cinfo = z_cmf >> 4;
+            unsigned int half_z_window_size = 1 << (z_cinfo + 7);
+            while (uncompressed_idat_size <= half_z_window_size &&
+                   half_z_window_size >= 256)
+            {
+               z_cinfo--;
+               half_z_window_size >>= 1;
+            }
+            z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
+            if (data[0] != (png_byte)z_cmf)
+            {
+               data[0] = (png_byte)z_cmf;
+               data[1] &= 0xe0;
+               data[1] += (png_byte)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);
+            }
+         }
+      }
+      else
+         png_error(png_ptr,
+            "Invalid zlib compression method or flags in IDAT");
+   }
+
+   png_write_chunk(png_ptr, png_IDAT, data, length);
+   png_ptr->mode |= PNG_HAVE_IDAT;
+}
+
+/* write an IEND chunk */
+void /* PRIVATE */
+png_write_IEND(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IEND;
+#endif
+   png_debug(1, "in png_write_IEND\n");
+   png_write_chunk(png_ptr, png_IEND, png_bytep_NULL,
+     (png_size_t)0);
+   png_ptr->mode |= PNG_HAVE_IEND;
+}
+
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+/* write a gAMA chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_gAMA(png_structp png_ptr, double file_gamma)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_gAMA;
+#endif
+   png_uint_32 igamma;
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA\n");
+   /* file_gamma is saved in 1/100,000ths */
+   igamma = (png_uint_32)(file_gamma * 100000.0 + 0.5);
+   png_save_uint_32(buf, igamma);
+   png_write_chunk(png_ptr, png_gAMA, buf, (png_size_t)4);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_gAMA_fixed(png_structp png_ptr, png_fixed_point file_gamma)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_gAMA;
+#endif
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA\n");
+   /* file_gamma is saved in 1/100,000ths */
+   png_save_uint_32(buf, (png_uint_32)file_gamma);
+   png_write_chunk(png_ptr, png_gAMA, buf, (png_size_t)4);
+}
+#endif
+#endif
+
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+/* write a sRGB chunk */
+void /* PRIVATE */
+png_write_sRGB(png_structp png_ptr, int srgb_intent)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sRGB;
+#endif
+   png_byte buf[1];
+
+   png_debug(1, "in png_write_sRGB\n");
+   if(srgb_intent >= PNG_sRGB_INTENT_LAST)
+         png_warning(png_ptr,
+            "Invalid sRGB rendering intent specified");
+   buf[0]=(png_byte)srgb_intent;
+   png_write_chunk(png_ptr, png_sRGB, buf, (png_size_t)1);
+}
+#endif
+
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+/* write an iCCP chunk */
+void /* PRIVATE */
+png_write_iCCP(png_structp png_ptr, png_charp name, int compression_type,
+   png_charp profile, int profile_len)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_iCCP;
+#endif
+   png_size_t name_len;
+   png_charp new_name;
+   compression_state comp;
+   int embedded_profile_len = 0;
+
+   png_debug(1, "in png_write_iCCP\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+   comp.input_len = 0;
+
+   if (name == NULL || (name_len = png_check_keyword(png_ptr, name,
+      &new_name)) == 0)
+   {
+      png_warning(png_ptr, "Empty keyword in iCCP chunk");
+      return;
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+      png_warning(png_ptr, "Unknown compression type in iCCP chunk");
+
+   if (profile == NULL)
+      profile_len = 0;
+
+   if (profile_len > 3)
+      embedded_profile_len =
+          ((*( (png_bytep)profile  ))<<24) |
+          ((*( (png_bytep)profile+1))<<16) |
+          ((*( (png_bytep)profile+2))<< 8) |
+          ((*( (png_bytep)profile+3))    );
+
+   if (profile_len < embedded_profile_len)
+     {
+        png_warning(png_ptr,
+          "Embedded profile length too large in iCCP chunk");
+        return;
+     }
+
+   if (profile_len > embedded_profile_len)
+     {
+        png_warning(png_ptr,
+          "Truncating profile to actual length in iCCP chunk");
+        profile_len = embedded_profile_len;
+     }
+
+   if (profile_len)
+       profile_len = png_text_compress(png_ptr, profile, (png_size_t)profile_len,
+          PNG_COMPRESSION_TYPE_BASE, &comp);
+
+   /* make sure we include the NULL after the name and the compression type */
+   png_write_chunk_start(png_ptr, png_iCCP,
+          (png_uint_32)name_len+profile_len+2);
+   new_name[name_len+1]=0x00;
+   png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 2);
+
+   if (profile_len)
+      png_write_compressed_data_out(png_ptr, &comp);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_name);
+}
+#endif
+
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+/* write a sPLT chunk */
+void /* PRIVATE */
+png_write_sPLT(png_structp png_ptr, png_sPLT_tp spalette)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sPLT;
+#endif
+   png_size_t name_len;
+   png_charp new_name;
+   png_byte entrybuf[10];
+   int entry_size = (spalette->depth == 8 ? 6 : 10);
+   int palette_size = entry_size * spalette->nentries;
+   png_sPLT_entryp ep;
+#ifdef PNG_NO_POINTER_INDEXING
+   int i;
+#endif
+
+   png_debug(1, "in png_write_sPLT\n");
+   if (spalette->name == NULL || (name_len = png_check_keyword(png_ptr,
+      spalette->name, &new_name))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in sPLT chunk");
+      return;
+   }
+
+   /* make sure we include the NULL after the name */
+   png_write_chunk_start(png_ptr, png_sPLT,
+          (png_uint_32)(name_len + 2 + palette_size));
+   png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 1);
+   png_write_chunk_data(png_ptr, (png_bytep)&spalette->depth, 1);
+
+   /* loop through each palette entry, writing appropriately */
+#ifndef PNG_NO_POINTER_INDEXING
+   for (ep = spalette->entries; ep<spalette->entries+spalette->nentries; ep++)
+   {
+       if (spalette->depth == 8)
+       {
+           entrybuf[0] = (png_byte)ep->red;
+           entrybuf[1] = (png_byte)ep->green;
+           entrybuf[2] = (png_byte)ep->blue;
+           entrybuf[3] = (png_byte)ep->alpha;
+           png_save_uint_16(entrybuf + 4, ep->frequency);
+       }
+       else
+       {
+           png_save_uint_16(entrybuf + 0, ep->red);
+           png_save_uint_16(entrybuf + 2, ep->green);
+           png_save_uint_16(entrybuf + 4, ep->blue);
+           png_save_uint_16(entrybuf + 6, ep->alpha);
+           png_save_uint_16(entrybuf + 8, ep->frequency);
+       }
+       png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size);
+   }
+#else
+   ep=spalette->entries;
+   for (i=0; i>spalette->nentries; i++)
+   {
+       if (spalette->depth == 8)
+       {
+           entrybuf[0] = (png_byte)ep[i].red;
+           entrybuf[1] = (png_byte)ep[i].green;
+           entrybuf[2] = (png_byte)ep[i].blue;
+           entrybuf[3] = (png_byte)ep[i].alpha;
+           png_save_uint_16(entrybuf + 4, ep[i].frequency);
+       }
+       else
+       {
+           png_save_uint_16(entrybuf + 0, ep[i].red);
+           png_save_uint_16(entrybuf + 2, ep[i].green);
+           png_save_uint_16(entrybuf + 4, ep[i].blue);
+           png_save_uint_16(entrybuf + 6, ep[i].alpha);
+           png_save_uint_16(entrybuf + 8, ep[i].frequency);
+       }
+       png_write_chunk_data(png_ptr, entrybuf, entry_size);
+   }
+#endif
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_name);
+}
+#endif
+
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+/* write the sBIT chunk */
+void /* PRIVATE */
+png_write_sBIT(png_structp png_ptr, png_color_8p sbit, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sBIT;
+#endif
+   png_byte buf[4];
+   png_size_t size;
+
+   png_debug(1, "in png_write_sBIT\n");
+   /* make sure we don't depend upon the order of PNG_COLOR_8 */
+   if (color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_byte maxbits;
+
+      maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 :
+                png_ptr->usr_bit_depth);
+      if (sbit->red == 0 || sbit->red > maxbits ||
+          sbit->green == 0 || sbit->green > maxbits ||
+          sbit->blue == 0 || sbit->blue > maxbits)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[0] = sbit->red;
+      buf[1] = sbit->green;
+      buf[2] = sbit->blue;
+      size = 3;
+   }
+   else
+   {
+      if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[0] = sbit->gray;
+      size = 1;
+   }
+
+   if (color_type & PNG_COLOR_MASK_ALPHA)
+   {
+      if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[size++] = sbit->alpha;
+   }
+
+   png_write_chunk(png_ptr, png_sBIT, buf, size);
+}
+#endif
+
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+/* write the cHRM chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_cHRM(png_structp png_ptr, double white_x, double white_y,
+   double red_x, double red_y, double green_x, double green_y,
+   double blue_x, double blue_y)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_cHRM;
+#endif
+   png_byte buf[32];
+   png_uint_32 itemp;
+
+   png_debug(1, "in png_write_cHRM\n");
+   /* each value is saved in 1/100,000ths */
+   if (white_x < 0 || white_x > 0.8 || white_y < 0 || white_y > 0.8 ||
+       white_x + white_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM white point specified");
+#if !defined(PNG_NO_CONSOLE_IO)
+      fprintf(stderr,"white_x=%f, white_y=%f\n",white_x, white_y);
+#endif
+      return;
+   }
+   itemp = (png_uint_32)(white_x * 100000.0 + 0.5);
+   png_save_uint_32(buf, itemp);
+   itemp = (png_uint_32)(white_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 4, itemp);
+
+   if (red_x < 0 ||  red_y < 0 || red_x + red_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM red point specified");
+      return;
+   }
+   itemp = (png_uint_32)(red_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 8, itemp);
+   itemp = (png_uint_32)(red_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 12, itemp);
+
+   if (green_x < 0 || green_y < 0 || green_x + green_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM green point specified");
+      return;
+   }
+   itemp = (png_uint_32)(green_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 16, itemp);
+   itemp = (png_uint_32)(green_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 20, itemp);
+
+   if (blue_x < 0 || blue_y < 0 || blue_x + blue_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM blue point specified");
+      return;
+   }
+   itemp = (png_uint_32)(blue_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 24, itemp);
+   itemp = (png_uint_32)(blue_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 28, itemp);
+
+   png_write_chunk(png_ptr, png_cHRM, buf, (png_size_t)32);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_cHRM_fixed(png_structp png_ptr, png_fixed_point white_x,
+   png_fixed_point white_y, png_fixed_point red_x, png_fixed_point red_y,
+   png_fixed_point green_x, png_fixed_point green_y, png_fixed_point blue_x,
+   png_fixed_point blue_y)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_cHRM;
+#endif
+   png_byte buf[32];
+
+   png_debug(1, "in png_write_cHRM\n");
+   /* each value is saved in 1/100,000ths */
+   if (white_x > 80000L || white_y > 80000L || white_x + white_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM white point specified");
+#if !defined(PNG_NO_CONSOLE_IO)
+      fprintf(stderr,"white_x=%ld, white_y=%ld\n",white_x, white_y);
+#endif
+      return;
+   }
+   png_save_uint_32(buf, (png_uint_32)white_x);
+   png_save_uint_32(buf + 4, (png_uint_32)white_y);
+
+   if (red_x + red_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM fixed red point specified");
+      return;
+   }
+   png_save_uint_32(buf + 8, (png_uint_32)red_x);
+   png_save_uint_32(buf + 12, (png_uint_32)red_y);
+
+   if (green_x + green_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM green point specified");
+      return;
+   }
+   png_save_uint_32(buf + 16, (png_uint_32)green_x);
+   png_save_uint_32(buf + 20, (png_uint_32)green_y);
+
+   if (blue_x + blue_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM blue point specified");
+      return;
+   }
+   png_save_uint_32(buf + 24, (png_uint_32)blue_x);
+   png_save_uint_32(buf + 28, (png_uint_32)blue_y);
+
+   png_write_chunk(png_ptr, png_cHRM, buf, (png_size_t)32);
+}
+#endif
+#endif
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+/* write the tRNS chunk */
+void /* PRIVATE */
+png_write_tRNS(png_structp png_ptr, png_bytep trans, png_color_16p tran,
+   int num_trans, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tRNS;
+#endif
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_tRNS\n");
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette)
+      {
+         png_warning(png_ptr,"Invalid number of transparent colors specified");
+         return;
+      }
+      /* write the chunk out as it is */
+      png_write_chunk(png_ptr, png_tRNS, trans, (png_size_t)num_trans);
+   }
+   else if (color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      /* one 16 bit value */
+      if(tran->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_warning(png_ptr,
+           "Ignoring attempt to write tRNS chunk out-of-range for bit_depth");
+         return;
+      }
+      png_save_uint_16(buf, tran->gray);
+      png_write_chunk(png_ptr, png_tRNS, buf, (png_size_t)2);
+   }
+   else if (color_type == PNG_COLOR_TYPE_RGB)
+   {
+      /* three 16 bit values */
+      png_save_uint_16(buf, tran->red);
+      png_save_uint_16(buf + 2, tran->green);
+      png_save_uint_16(buf + 4, tran->blue);
+      if(png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
+         {
+            png_warning(png_ptr,
+              "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8");
+            return;
+         }
+      png_write_chunk(png_ptr, png_tRNS, buf, (png_size_t)6);
+   }
+   else
+   {
+      png_warning(png_ptr, "Can't write tRNS with an alpha channel");
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+/* write the background chunk */
+void /* PRIVATE */
+png_write_bKGD(png_structp png_ptr, png_color_16p back, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_bKGD;
+#endif
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_bKGD\n");
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+          (png_ptr->num_palette ||
+          (!(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE))) &&
+#endif
+         back->index > png_ptr->num_palette)
+      {
+         png_warning(png_ptr, "Invalid background palette index");
+         return;
+      }
+      buf[0] = back->index;
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)1);
+   }
+   else if (color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_save_uint_16(buf, back->red);
+      png_save_uint_16(buf + 2, back->green);
+      png_save_uint_16(buf + 4, back->blue);
+      if(png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
+         {
+            png_warning(png_ptr,
+              "Ignoring attempt to write 16-bit bKGD chunk when bit_depth is 8");
+            return;
+         }
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)6);
+   }
+   else
+   {
+      if(back->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_warning(png_ptr,
+           "Ignoring attempt to write bKGD chunk out-of-range for bit_depth");
+         return;
+      }
+      png_save_uint_16(buf, back->gray);
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)2);
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+/* write the histogram */
+void /* PRIVATE */
+png_write_hIST(png_structp png_ptr, png_uint_16p hist, int num_hist)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_hIST;
+#endif
+   int i;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_hIST\n");
+   if (num_hist > (int)png_ptr->num_palette)
+   {
+      png_debug2(3, "num_hist = %d, num_palette = %d\n", num_hist,
+         png_ptr->num_palette);
+      png_warning(png_ptr, "Invalid number of histogram entries specified");
+      return;
+   }
+
+   png_write_chunk_start(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
+   for (i = 0; i < num_hist; i++)
+   {
+      png_save_uint_16(buf, hist[i]);
+      png_write_chunk_data(png_ptr, buf, (png_size_t)2);
+   }
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
+    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification,
+ * and if invalid, correct the keyword rather than discarding the entire
+ * chunk.  The PNG 1.0 specification requires keywords 1-79 characters in
+ * length, forbids leading or trailing whitespace, multiple internal spaces,
+ * and the non-break space (0x80) from ISO 8859-1.  Returns keyword length.
+ *
+ * The new_key is allocated to hold the corrected keyword and must be freed
+ * by the calling routine.  This avoids problems with trying to write to
+ * static keywords without having to have duplicate copies of the strings.
+ */
+png_size_t /* PRIVATE */
+png_check_keyword(png_structp png_ptr, png_charp key, png_charpp new_key)
+{
+   png_size_t key_len;
+   png_charp kp, dp;
+   int kflag;
+   int kwarn=0;
+
+   png_debug(1, "in png_check_keyword\n");
+   *new_key = NULL;
+
+   if (key == NULL || (key_len = png_strlen(key)) == 0)
+   {
+      png_warning(png_ptr, "zero length keyword");
+      return ((png_size_t)0);
+   }
+
+   png_debug1(2, "Keyword to be checked is '%s'\n", key);
+
+   *new_key = (png_charp)png_malloc_warn(png_ptr, (png_uint_32)(key_len + 2));
+   if (*new_key == NULL)
+   {
+      png_warning(png_ptr, "Out of memory while procesing keyword");
+      return ((png_size_t)0);
+   }
+
+   /* Replace non-printing characters with a blank and print a warning */
+   for (kp = key, dp = *new_key; *kp != '\0'; kp++, dp++)
+   {
+      if ((png_byte)*kp < 0x20 ||
+         ((png_byte)*kp > 0x7E && (png_byte)*kp < 0xA1))
+      {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+         char msg[40];
+
+         png_snprintf(msg, 40,
+           "invalid keyword character 0x%02X", (png_byte)*kp);
+         png_warning(png_ptr, msg);
+#else
+         png_warning(png_ptr, "invalid character in keyword");
+#endif
+         *dp = ' ';
+      }
+      else
+      {
+         *dp = *kp;
+      }
+   }
+   *dp = '\0';
+
+   /* Remove any trailing white space. */
+   kp = *new_key + key_len - 1;
+   if (*kp == ' ')
+   {
+      png_warning(png_ptr, "trailing spaces removed from keyword");
+
+      while (*kp == ' ')
+      {
+        *(kp--) = '\0';
+        key_len--;
+      }
+   }
+
+   /* Remove any leading white space. */
+   kp = *new_key;
+   if (*kp == ' ')
+   {
+      png_warning(png_ptr, "leading spaces removed from keyword");
+
+      while (*kp == ' ')
+      {
+        kp++;
+        key_len--;
+      }
+   }
+
+   png_debug1(2, "Checking for multiple internal spaces in '%s'\n", kp);
+
+   /* Remove multiple internal spaces. */
+   for (kflag = 0, dp = *new_key; *kp != '\0'; kp++)
+   {
+      if (*kp == ' ' && kflag == 0)
+      {
+         *(dp++) = *kp;
+         kflag = 1;
+      }
+      else if (*kp == ' ')
+      {
+         key_len--;
+         kwarn=1;
+      }
+      else
+      {
+         *(dp++) = *kp;
+         kflag = 0;
+      }
+   }
+   *dp = '\0';
+   if(kwarn)
+      png_warning(png_ptr, "extra interior spaces removed from keyword");
+
+   if (key_len == 0)
+   {
+      png_free(png_ptr, *new_key);
+      *new_key=NULL;
+      png_warning(png_ptr, "Zero length keyword");
+   }
+
+   if (key_len > 79)
+   {
+      png_warning(png_ptr, "keyword length must be 1 - 79 characters");
+      new_key[79] = '\0';
+      key_len = 79;
+   }
+
+   return (key_len);
+}
+#endif
+
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+/* write a tEXt chunk */
+void /* PRIVATE */
+png_write_tEXt(png_structp png_ptr, png_charp key, png_charp text,
+   png_size_t text_len)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tEXt;
+#endif
+   png_size_t key_len;
+   png_charp new_key;
+
+   png_debug(1, "in png_write_tEXt\n");
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in tEXt chunk");
+      return;
+   }
+
+   if (text == NULL || *text == '\0')
+      text_len = 0;
+   else
+      text_len = png_strlen(text);
+
+   /* make sure we include the 0 after the key */
+   png_write_chunk_start(png_ptr, png_tEXt, (png_uint_32)key_len+text_len+1);
+   /*
+    * We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+   if (text_len)
+      png_write_chunk_data(png_ptr, (png_bytep)text, text_len);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_key);
+}
+#endif
+
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+/* write a compressed text chunk */
+void /* PRIVATE */
+png_write_zTXt(png_structp png_ptr, png_charp key, png_charp text,
+   png_size_t text_len, int compression)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_zTXt;
+#endif
+   png_size_t key_len;
+   char buf[1];
+   png_charp new_key;
+   compression_state comp;
+
+   png_debug(1, "in png_write_zTXt\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+   comp.input_len = 0;
+
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in zTXt chunk");
+      return;
+   }
+
+   if (text == NULL || *text == '\0' || compression==PNG_TEXT_COMPRESSION_NONE)
+   {
+      png_write_tEXt(png_ptr, new_key, text, (png_size_t)0);
+      png_free(png_ptr, new_key);
+      return;
+   }
+
+   text_len = png_strlen(text);
+
+   /* compute the compressed data; do it now for the length */
+   text_len = png_text_compress(png_ptr, text, text_len, compression,
+       &comp);
+
+   /* write start of chunk */
+   png_write_chunk_start(png_ptr, png_zTXt, (png_uint_32)
+      (key_len+text_len+2));
+   /* write key */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+   png_free(png_ptr, new_key);
+
+   buf[0] = (png_byte)compression;
+   /* write compression */
+   png_write_chunk_data(png_ptr, (png_bytep)buf, (png_size_t)1);
+   /* write the compressed data */
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   /* close the chunk */
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+/* write an iTXt chunk */
+void /* PRIVATE */
+png_write_iTXt(png_structp png_ptr, int compression, png_charp key,
+    png_charp lang, png_charp lang_key, png_charp text)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_iTXt;
+#endif
+   png_size_t lang_len, key_len, lang_key_len, text_len;
+   png_charp new_lang, new_key;
+   png_byte cbuf[2];
+   compression_state comp;
+
+   png_debug(1, "in png_write_iTXt\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in iTXt chunk");
+      return;
+   }
+   if (lang == NULL || (lang_len = png_check_keyword(png_ptr, lang, &new_lang))==0)
+   {
+      png_warning(png_ptr, "Empty language field in iTXt chunk");
+      new_lang = NULL;
+      lang_len = 0;
+   }
+
+   if (lang_key == NULL)
+     lang_key_len = 0;
+   else
+     lang_key_len = png_strlen(lang_key);
+
+   if (text == NULL)
+      text_len = 0;
+   else
+     text_len = png_strlen(text);
+
+   /* compute the compressed data; do it now for the length */
+   text_len = png_text_compress(png_ptr, text, text_len, compression-2,
+      &comp);
+
+
+   /* make sure we include the compression flag, the compression byte,
+    * and the NULs after the key, lang, and lang_key parts */
+
+   png_write_chunk_start(png_ptr, png_iTXt,
+          (png_uint_32)(
+        5 /* comp byte, comp flag, terminators for key, lang and lang_key */
+        + key_len
+        + lang_len
+        + lang_key_len
+        + text_len));
+
+   /*
+    * We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+
+   /* set the compression flag */
+   if (compression == PNG_ITXT_COMPRESSION_NONE || \
+       compression == PNG_TEXT_COMPRESSION_NONE)
+       cbuf[0] = 0;
+   else /* compression == PNG_ITXT_COMPRESSION_zTXt */
+       cbuf[0] = 1;
+   /* set the compression method */
+   cbuf[1] = 0;
+   png_write_chunk_data(png_ptr, cbuf, 2);
+
+   cbuf[0] = 0;
+   png_write_chunk_data(png_ptr, (new_lang ? (png_bytep)new_lang : cbuf), lang_len + 1);
+   png_write_chunk_data(png_ptr, (lang_key ? (png_bytep)lang_key : cbuf), lang_key_len + 1);
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_key);
+   png_free(png_ptr, new_lang);
+}
+#endif
+
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+/* write the oFFs chunk */
+void /* PRIVATE */
+png_write_oFFs(png_structp png_ptr, png_int_32 x_offset, png_int_32 y_offset,
+   int unit_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_oFFs;
+#endif
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_oFFs\n");
+   if (unit_type >= PNG_OFFSET_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for oFFs chunk");
+
+   png_save_int_32(buf, x_offset);
+   png_save_int_32(buf + 4, y_offset);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_chunk(png_ptr, png_oFFs, buf, (png_size_t)9);
+}
+#endif
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+/* write the pCAL chunk (described in the PNG extensions document) */
+void /* PRIVATE */
+png_write_pCAL(png_structp png_ptr, png_charp purpose, png_int_32 X0,
+   png_int_32 X1, int type, int nparams, png_charp units, png_charpp params)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_pCAL;
+#endif
+   png_size_t purpose_len, units_len, total_len;
+   png_uint_32p params_len;
+   png_byte buf[10];
+   png_charp new_purpose;
+   int i;
+
+   png_debug1(1, "in png_write_pCAL (%d parameters)\n", nparams);
+   if (type >= PNG_EQUATION_LAST)
+      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
+
+   purpose_len = png_check_keyword(png_ptr, purpose, &new_purpose) + 1;
+   png_debug1(3, "pCAL purpose length = %d\n", (int)purpose_len);
+   units_len = png_strlen(units) + (nparams == 0 ? 0 : 1);
+   png_debug1(3, "pCAL units length = %d\n", (int)units_len);
+   total_len = purpose_len + units_len + 10;
+
+   params_len = (png_uint_32p)png_malloc(png_ptr, (png_uint_32)(nparams
+      *png_sizeof(png_uint_32)));
+
+   /* Find the length of each parameter, making sure we don't count the
+      null terminator for the last parameter. */
+   for (i = 0; i < nparams; i++)
+   {
+      params_len[i] = png_strlen(params[i]) + (i == nparams - 1 ? 0 : 1);
+      png_debug2(3, "pCAL parameter %d length = %lu\n", i, params_len[i]);
+      total_len += (png_size_t)params_len[i];
+   }
+
+   png_debug1(3, "pCAL total length = %d\n", (int)total_len);
+   png_write_chunk_start(png_ptr, png_pCAL, (png_uint_32)total_len);
+   png_write_chunk_data(png_ptr, (png_bytep)new_purpose, purpose_len);
+   png_save_int_32(buf, X0);
+   png_save_int_32(buf + 4, X1);
+   buf[8] = (png_byte)type;
+   buf[9] = (png_byte)nparams;
+   png_write_chunk_data(png_ptr, buf, (png_size_t)10);
+   png_write_chunk_data(png_ptr, (png_bytep)units, (png_size_t)units_len);
+
+   png_free(png_ptr, new_purpose);
+
+   for (i = 0; i < nparams; i++)
+   {
+      png_write_chunk_data(png_ptr, (png_bytep)params[i],
+         (png_size_t)params_len[i]);
+   }
+
+   png_free(png_ptr, params_len);
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+/* write the sCAL chunk */
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+void /* PRIVATE */
+png_write_sCAL(png_structp png_ptr, int unit, double width, double height)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sCAL;
+#endif
+   char buf[64];
+   png_size_t total_len;
+
+   png_debug(1, "in png_write_sCAL\n");
+
+   buf[0] = (char)unit;
+#if defined(_WIN32_WCE)
+/* sprintf() function is not supported on WindowsCE */
+   {
+      wchar_t wc_buf[32];
+      size_t wc_len;
+      swprintf(wc_buf, TEXT("%12.12e"), width);
+      wc_len = wcslen(wc_buf);
+      WideCharToMultiByte(CP_ACP, 0, wc_buf, -1, buf + 1, wc_len, NULL, NULL);
+      total_len = wc_len + 2;
+      swprintf(wc_buf, TEXT("%12.12e"), height);
+      wc_len = wcslen(wc_buf);
+      WideCharToMultiByte(CP_ACP, 0, wc_buf, -1, buf + total_len, wc_len,
+         NULL, NULL);
+      total_len += wc_len;
+   }
+#else
+   png_snprintf(buf + 1, 63, "%12.12e", width);
+   total_len = 1 + png_strlen(buf + 1) + 1;
+   png_snprintf(buf + total_len, 64-total_len, "%12.12e", height);
+   total_len += png_strlen(buf + total_len);
+#endif
+
+   png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
+   png_write_chunk(png_ptr, png_sCAL, (png_bytep)buf, total_len);
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_sCAL_s(png_structp png_ptr, int unit, png_charp width,
+   png_charp height)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sCAL;
+#endif
+   png_byte buf[64];
+   png_size_t wlen, hlen, total_len;
+
+   png_debug(1, "in png_write_sCAL_s\n");
+
+   wlen = png_strlen(width);
+   hlen = png_strlen(height);
+   total_len = wlen + hlen + 2;
+   if (total_len > 64)
+   {
+      png_warning(png_ptr, "Can't write sCAL (buffer too small)");
+      return;
+   }
+
+   buf[0] = (png_byte)unit;
+   png_memcpy(buf + 1, width, wlen + 1);      /* append the '\0' here */
+   png_memcpy(buf + wlen + 2, height, hlen);  /* do NOT append the '\0' here */
+
+   png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
+   png_write_chunk(png_ptr, png_sCAL, buf, total_len);
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+/* write the pHYs chunk */
+void /* PRIVATE */
+png_write_pHYs(png_structp png_ptr, png_uint_32 x_pixels_per_unit,
+   png_uint_32 y_pixels_per_unit,
+   int unit_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_pHYs;
+#endif
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_pHYs\n");
+   if (unit_type >= PNG_RESOLUTION_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for pHYs chunk");
+
+   png_save_uint_32(buf, x_pixels_per_unit);
+   png_save_uint_32(buf + 4, y_pixels_per_unit);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_chunk(png_ptr, png_pHYs, buf, (png_size_t)9);
+}
+#endif
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+/* Write the tIME chunk.  Use either png_convert_from_struct_tm()
+ * or png_convert_from_time_t(), or fill in the structure yourself.
+ */
+void /* PRIVATE */
+png_write_tIME(png_structp png_ptr, png_timep mod_time)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tIME;
+#endif
+   png_byte buf[7];
+
+   png_debug(1, "in png_write_tIME\n");
+   if (mod_time->month  > 12 || mod_time->month  < 1 ||
+       mod_time->day    > 31 || mod_time->day    < 1 ||
+       mod_time->hour   > 23 || mod_time->second > 60)
+   {
+      png_warning(png_ptr, "Invalid time specified for tIME chunk");
+      return;
+   }
+
+   png_save_uint_16(buf, mod_time->year);
+   buf[2] = mod_time->month;
+   buf[3] = mod_time->day;
+   buf[4] = mod_time->hour;
+   buf[5] = mod_time->minute;
+   buf[6] = mod_time->second;
+
+   png_write_chunk(png_ptr, png_tIME, buf, (png_size_t)7);
+}
+#endif
+
+/* initializes the row writing capability of libpng */
+void /* PRIVATE */
+png_write_start_row(png_structp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+#endif
+
+   png_size_t buf_size;
+
+   png_debug(1, "in png_write_start_row\n");
+   buf_size = (png_size_t)(PNG_ROWBYTES(
+      png_ptr->usr_channels*png_ptr->usr_bit_depth,png_ptr->width)+1);
+
+   /* set up row buffer */
+   png_ptr->row_buf = (png_bytep)png_malloc(png_ptr, (png_uint_32)buf_size);
+   png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
+
+#ifndef PNG_NO_WRITE_FILTERING
+   /* set up filtering buffer, if using this filter */
+   if (png_ptr->do_filter & PNG_FILTER_SUB)
+   {
+      png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
+         (png_ptr->rowbytes + 1));
+      png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
+   }
+
+   /* We only need to keep the previous row if we are using one of these. */
+   if (png_ptr->do_filter & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH))
+   {
+     /* set up previous row buffer */
+      png_ptr->prev_row = (png_bytep)png_malloc(png_ptr, (png_uint_32)buf_size);
+      png_memset(png_ptr->prev_row, 0, buf_size);
+
+      if (png_ptr->do_filter & PNG_FILTER_UP)
+      {
+         png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
+      }
+
+      if (png_ptr->do_filter & PNG_FILTER_AVG)
+      {
+         png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
+      }
+
+      if (png_ptr->do_filter & PNG_FILTER_PAETH)
+      {
+         png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
+      }
+#endif /* PNG_NO_WRITE_FILTERING */
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* if interlaced, we need to set up width and height of pass */
+   if (png_ptr->interlaced)
+   {
+      if (!(png_ptr->transformations & PNG_INTERLACE))
+      {
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+            png_pass_ystart[0]) / png_pass_yinc[0];
+         png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 -
+            png_pass_start[0]) / png_pass_inc[0];
+      }
+      else
+      {
+         png_ptr->num_rows = png_ptr->height;
+         png_ptr->usr_width = png_ptr->width;
+      }
+   }
+   else
+#endif
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->usr_width = png_ptr->width;
+   }
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+}
+
+/* Internal use only.  Called when finished processing a row of data. */
+void /* PRIVATE */
+png_write_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+#endif
+
+   int ret;
+
+   png_debug(1, "in png_write_finish_row\n");
+   /* next row */
+   png_ptr->row_number++;
+
+   /* see if we are done */
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* if interlaced, go to next pass */
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      if (png_ptr->transformations & PNG_INTERLACE)
+      {
+         png_ptr->pass++;
+      }
+      else
+      {
+         /* loop until we find a non-zero width or height pass */
+         do
+         {
+            png_ptr->pass++;
+            if (png_ptr->pass >= 7)
+               break;
+            png_ptr->usr_width = (png_ptr->width +
+               png_pass_inc[png_ptr->pass] - 1 -
+               png_pass_start[png_ptr->pass]) /
+               png_pass_inc[png_ptr->pass];
+            png_ptr->num_rows = (png_ptr->height +
+               png_pass_yinc[png_ptr->pass] - 1 -
+               png_pass_ystart[png_ptr->pass]) /
+               png_pass_yinc[png_ptr->pass];
+            if (png_ptr->transformations & PNG_INTERLACE)
+               break;
+         } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0);
+
+      }
+
+      /* reset the row above the image for the next pass */
+      if (png_ptr->pass < 7)
+      {
+         if (png_ptr->prev_row != NULL)
+            png_memset(png_ptr->prev_row, 0,
+               (png_size_t)(PNG_ROWBYTES(png_ptr->usr_channels*
+               png_ptr->usr_bit_depth,png_ptr->width))+1);
+         return;
+      }
+   }
+#endif
+
+   /* if we get here, we've just written the last row, so we need
+      to flush the compressor */
+   do
+   {
+      /* tell the compressor we are done */
+      ret = deflate(&png_ptr->zstream, Z_FINISH);
+      /* check for an error */
+      if (ret == Z_OK)
+      {
+         /* check to see if we need more room */
+         if (!(png_ptr->zstream.avail_out))
+         {
+            png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+            png_ptr->zstream.next_out = png_ptr->zbuf;
+            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         }
+      }
+      else if (ret != Z_STREAM_END)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+   } while (ret != Z_STREAM_END);
+
+   /* write any extra space */
+   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
+   {
+      png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size -
+         png_ptr->zstream.avail_out);
+   }
+
+   deflateReset(&png_ptr->zstream);
+   png_ptr->zstream.data_type = Z_BINARY;
+}
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Pick out the correct pixels for the interlace pass.
+ * The basic idea here is to go through the row with a source
+ * pointer and a destination pointer (sp and dp), and copy the
+ * correct pixels for the pass.  As the row gets compacted,
+ * sp will always be >= dp, so we should never overwrite anything.
+ * See the default: case for the easiest code to understand.
+ */
+void /* PRIVATE */
+png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+#endif
+
+   png_debug(1, "in png_do_write_interlace\n");
+   /* we don't have to do anything on the last pass (6) */
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && pass < 6)
+#else
+   if (pass < 6)
+#endif
+   {
+      /* each pixel depth is handled separately */
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            d = 0;
+            shift = 7;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 3);
+               value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 7;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift--;
+
+            }
+            if (shift != 7)
+               *dp = (png_byte)d;
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 6;
+            d = 0;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 2);
+               value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 6;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift -= 2;
+            }
+            if (shift != 6)
+                   *dp = (png_byte)d;
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 4;
+            d = 0;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 1);
+               value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 4;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift -= 4;
+            }
+            if (shift != 4)
+               *dp = (png_byte)d;
+            break;
+         }
+         default:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            png_size_t pixel_bytes;
+
+            /* start at the beginning */
+            dp = row;
+            /* find out how many bytes each pixel takes up */
+            pixel_bytes = (row_info->pixel_depth >> 3);
+            /* loop through the row, only looking at the pixels that
+               matter */
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               /* find out where the original pixel is */
+               sp = row + (png_size_t)i * pixel_bytes;
+               /* move the pixel */
+               if (dp != sp)
+                  png_memcpy(dp, sp, pixel_bytes);
+               /* next pixel */
+               dp += pixel_bytes;
+            }
+            break;
+         }
+      }
+      /* set new row width */
+      row_info->width = (row_info->width +
+         png_pass_inc[pass] - 1 -
+         png_pass_start[pass]) /
+         png_pass_inc[pass];
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+            row_info->width);
+   }
+}
+#endif
+
+/* This filters the row, chooses which filter to use, if it has not already
+ * been specified by the application, and then writes the row out with the
+ * chosen filter.
+ */
+#define PNG_MAXSUM (((png_uint_32)(-1)) >> 1)
+#define PNG_HISHIFT 10
+#define PNG_LOMASK ((png_uint_32)0xffffL)
+#define PNG_HIMASK ((png_uint_32)(~PNG_LOMASK >> PNG_HISHIFT))
+void /* PRIVATE */
+png_write_find_filter(png_structp png_ptr, png_row_infop row_info)
+{
+   png_bytep best_row;
+#ifndef PNG_NO_WRITE_FILTER
+   png_bytep prev_row, row_buf;
+   png_uint_32 mins, bpp;
+   png_byte filter_to_do = png_ptr->do_filter;
+   png_uint_32 row_bytes = row_info->rowbytes;
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   int num_p_filters = (int)png_ptr->num_prev_filters;
+#endif
+
+   png_debug(1, "in png_write_find_filter\n");
+   /* find out how many bytes offset each pixel is */
+   bpp = (row_info->pixel_depth + 7) >> 3;
+
+   prev_row = png_ptr->prev_row;
+#endif
+   best_row = png_ptr->row_buf;
+#ifndef PNG_NO_WRITE_FILTER
+   row_buf = best_row;
+   mins = PNG_MAXSUM;
+
+   /* The prediction method we use is to find which method provides the
+    * smallest value when summing the absolute values of the distances
+    * from zero, using anything >= 128 as negative numbers.  This is known
+    * as the "minimum sum of absolute differences" heuristic.  Other
+    * heuristics are the "weighted minimum sum of absolute differences"
+    * (experimental and can in theory improve compression), and the "zlib
+    * predictive" method (not implemented yet), which does test compressions
+    * of lines using different filter methods, and then chooses the
+    * (series of) filter(s) that give minimum compressed data size (VERY
+    * computationally expensive).
+    *
+    * GRR 980525:  consider also
+    *   (1) minimum sum of absolute differences from running average (i.e.,
+    *       keep running sum of non-absolute differences & count of bytes)
+    *       [track dispersion, too?  restart average if dispersion too large?]
+    *  (1b) minimum sum of absolute differences from sliding average, probably
+    *       with window size <= deflate window (usually 32K)
+    *   (2) minimum sum of squared differences from zero or running average
+    *       (i.e., ~ root-mean-square approach)
+    */
+
+
+   /* We don't need to test the 'no filter' case if this is the only filter
+    * that has been chosen, as it doesn't actually do anything to the data.
+    */
+   if ((filter_to_do & PNG_FILTER_NONE) &&
+       filter_to_do != PNG_FILTER_NONE)
+   {
+      png_bytep rp;
+      png_uint_32 sum = 0;
+      png_uint_32 i;
+      int v;
+
+      for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++)
+      {
+         v = *rp;
+         sum += (v < 128) ? v : 256 - v;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         png_uint_32 sumhi, sumlo;
+         int j;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; /* Gives us some footroom */
+
+         /* Reduce the sum if we match any of the previous rows */
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         /* Factor in the cost of this filter (this is here for completeness,
+          * but it makes no sense to have a "cost" for the NONE filter, as
+          * it has the minimum possible computational cost - none).
+          */
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+      mins = sum;
+   }
+
+   /* sub filter */
+   if (filter_to_do == PNG_FILTER_SUB)
+   /* it's the only filter so no testing is needed */
+   {
+      png_bytep rp, lp, dp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
+           i++, rp++, dp++)
+      {
+         *dp = *rp;
+      }
+      for (lp = row_buf + 1; i < row_bytes;
+         i++, rp++, lp++, dp++)
+      {
+         *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+      }
+      best_row = png_ptr->sub_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_SUB)
+   {
+      png_bytep rp, dp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      /* We temporarily increase the "minimum sum" by the factor we
+       * would reduce the sum of this filter, so that we can do the
+       * early exit comparison without scaling the sum each time.
+       */
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
+           i++, rp++, dp++)
+      {
+         v = *dp = *rp;
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+      for (lp = row_buf + 1; i < row_bytes;
+         i++, rp++, lp++, dp++)
+      {
+         v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
+            {
+               sumlo = (sumlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->sub_row;
+      }
+   }
+
+   /* up filter */
+   if (filter_to_do == PNG_FILTER_UP)
+   {
+      png_bytep rp, dp, pp;
+      png_uint_32 i;
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
+           pp = prev_row + 1; i < row_bytes;
+           i++, rp++, pp++, dp++)
+      {
+         *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
+      }
+      best_row = png_ptr->up_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_UP)
+   {
+      png_bytep rp, dp, pp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
+           pp = prev_row + 1; i < row_bytes; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->up_row;
+      }
+   }
+
+   /* avg filter */
+   if (filter_to_do == PNG_FILTER_AVG)
+   {
+      png_bytep rp, dp, pp, lp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+      }
+      for (lp = row_buf + 1; i < row_bytes; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
+                 & 0xff);
+      }
+      best_row = png_ptr->avg_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_AVG)
+   {
+      png_bytep rp, dp, pp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_AVG)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+      for (lp = row_buf + 1; i < row_bytes; i++)
+      {
+         v = *dp++ =
+          (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->avg_row;
+      }
+   }
+
+   /* Paeth filter */
+   if (filter_to_do == PNG_FILTER_PAETH)
+   {
+      png_bytep rp, dp, pp, cp, lp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+      }
+
+      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
+      {
+         int a, b, c, pa, pb, pc, p;
+
+         b = *pp++;
+         c = *cp++;
+         a = *lp++;
+
+         p = b - c;
+         pc = a - c;
+
+#ifdef PNG_USE_ABS
+         pa = abs(p);
+         pb = abs(pc);
+         pc = abs(p + pc);
+#else
+         pa = p < 0 ? -p : p;
+         pb = pc < 0 ? -pc : pc;
+         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+         *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+      }
+      best_row = png_ptr->paeth_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_PAETH)
+   {
+      png_bytep rp, dp, pp, cp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+
+      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
+      {
+         int a, b, c, pa, pb, pc, p;
+
+         b = *pp++;
+         c = *cp++;
+         a = *lp++;
+
+#ifndef PNG_SLOW_PAETH
+         p = b - c;
+         pc = a - c;
+#ifdef PNG_USE_ABS
+         pa = abs(p);
+         pb = abs(pc);
+         pc = abs(p + pc);
+#else
+         pa = p < 0 ? -p : p;
+         pb = pc < 0 ? -pc : pc;
+         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+#else /* PNG_SLOW_PAETH */
+         p = a + b - c;
+         pa = abs(p - a);
+         pb = abs(p - b);
+         pc = abs(p - c);
+         if (pa <= pb && pa <= pc)
+            p = a;
+         else if (pb <= pc)
+            p = b;
+         else
+            p = c;
+#endif /* PNG_SLOW_PAETH */
+
+         v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         best_row = png_ptr->paeth_row;
+      }
+   }
+#endif /* PNG_NO_WRITE_FILTER */
+   /* Do the actual writing of the filtered row data from the chosen filter. */
+
+   png_write_filtered_row(png_ptr, best_row);
+
+#ifndef PNG_NO_WRITE_FILTER
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   /* Save the type of filter we picked this time for future calculations */
+   if (png_ptr->num_prev_filters > 0)
+   {
+      int j;
+      for (j = 1; j < num_p_filters; j++)
+      {
+         png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1];
+      }
+      png_ptr->prev_filters[j] = best_row[0];
+   }
+#endif
+#endif /* PNG_NO_WRITE_FILTER */
+}
+
+
+/* Do the actual writing of a previously filtered row. */
+void /* PRIVATE */
+png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row)
+{
+   png_debug(1, "in png_write_filtered_row\n");
+   png_debug1(2, "filter = %d\n", filtered_row[0]);
+   /* set up the zlib input buffer */
+
+   png_ptr->zstream.next_in = filtered_row;
+   png_ptr->zstream.avail_in = (uInt)png_ptr->row_info.rowbytes + 1;
+   /* repeat until we have compressed all the data */
+   do
+   {
+      int ret; /* return of zlib */
+
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
+      /* check for compression errors */
+      if (ret != Z_OK)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+
+      /* see if it is time to write another IDAT */
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* write the IDAT and reset the zlib output buffer */
+         png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+      }
+   /* repeat until all data has been compressed */
+   } while (png_ptr->zstream.avail_in);
+
+   /* swap the current and previous rows */
+   if (png_ptr->prev_row != NULL)
+   {
+      png_bytep tptr;
+
+      tptr = png_ptr->prev_row;
+      png_ptr->prev_row = png_ptr->row_buf;
+      png_ptr->row_buf = tptr;
+   }
+
+   /* finish row - updates counters and flushes zlib if last row */
+   png_write_finish_row(png_ptr);
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_ptr->flush_rows++;
+
+   if (png_ptr->flush_dist > 0 &&
+       png_ptr->flush_rows >= png_ptr->flush_dist)
+   {
+      png_write_flush(png_ptr);
+   }
+#endif
+}
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.cpp b/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.cpp
new file mode 100644
index 0000000..c958b62
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.cpp
@@ -0,0 +1,46 @@
+//
+//  ExecuteEntryFunctions.cpp
+//  PoissonRecon
+//
+//  Created by Aaron Thompson on 2/13/19.
+//  Copyright © 2019 Standard Cyborg. All rights reserved.
+//
+
+#include "ExecuteEntryFunctions.hpp"
+#include "PoissonReconExecute.hpp"
+#include "SurfaceTrimmerExecute.hpp"
+
+/**
+ Why have these together instead of moving them into their respective *Execute source files?
+ - There's a conflict between the OS-provided Point struct type and this library's Point,
+   so you can't include both together.
+ - If the compiler doesn't specialize both at the same time, it ends up with duplicate symbols
+   when linking because it didn't know to name them differently (I think?)
+ */
+
+void PoissonReconExecute(const char *inputFilePath,
+                         const char *outputFilePath,
+                         bool closed,
+                         PoissonReconParameters parameters,
+                         std::function<bool (float)> progressHandler)
+{
+    typedef IsotropicUIntPack<3, FEMDegreeAndBType<1, BOUNDARY_NEUMANN>::Signature> FEMSignatureNeumann;
+    typedef IsotropicUIntPack<3, FEMDegreeAndBType<1, BOUNDARY_DIRICHLET>::Signature> FEMSignatureDirichlet;
+    typedef PointStreamColor<float> ColorType;
+    
+    if (closed) {
+        _PoissonReconExecute<ColorType>(inputFilePath, outputFilePath, parameters, progressHandler, FEMSignatureDirichlet());
+    } else {
+        _PoissonReconExecute<ColorType>(inputFilePath, outputFilePath, parameters, progressHandler, FEMSignatureNeumann());
+    }
+}
+
+int SurfaceTrimmerExecute(const char* inputFilePath,
+                          const char* outputFilePath,
+                          SurfaceTrimmerParameters parameters,
+                          std::function<bool (float)> progressHandler)
+{
+    typedef PointStreamNormal<float, 3> NormalType;
+    typedef PointStreamColor<float> ColorType;
+    return _SurfaceTrimmerExecute<NormalType, ColorType>(inputFilePath, outputFilePath, parameters, progressHandler);
+}
diff --git a/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.hpp b/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.hpp
new file mode 100644
index 0000000..a2007c1
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/ExecuteEntryFunctions.hpp
@@ -0,0 +1,30 @@
+//
+//  ExecuteEntryFunctions.hpp
+//  PoissonRecon
+//
+//  Created by Aaron Thompson on 2/13/19.
+//  Copyright © 2019 Standard Cyborg. All rights reserved.
+//
+
+#ifndef ExecuteEntryFunctions_hpp
+#define ExecuteEntryFunctions_hpp
+
+#include "Parameters.hpp"
+#include <functional>
+
+/** @param progressHandler Reports progress, from 0-1. The handler should return false if progress should stop, i.e. it was canceled.
+    @param closed If true, uses Dirichlet boundary parameters instead of Neumann.
+ */
+extern void PoissonReconExecute(const char *inputFilePath,
+                                const char *outputFilePath,
+                                bool closed,
+                                PoissonReconParameters parameters,
+                                std::function<bool (float)> progressHandler);
+
+/** Returns 0 on success, nonzero on error */
+extern int SurfaceTrimmerExecute(const char* inputFilePath,
+                                 const char* outputFilePath,
+                                 SurfaceTrimmerParameters parameters,
+                                 std::function<bool (float)> progressHandler);
+
+#endif /* ExecuteEntryFunctions_hpp */
diff --git a/PoissonRecon-Xcode/Meshing/Info.plist b/PoissonRecon-Xcode/Meshing/Info.plist
new file mode 100644
index 0000000..1007fd9
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/Info.plist
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>$(CURRENT_PROJECT_VERSION)</string>
+	<key>NSPrincipalClass</key>
+	<string></string>
+</dict>
+</plist>
diff --git a/PoissonRecon-Xcode/Meshing/Meshing.h b/PoissonRecon-Xcode/Meshing/Meshing.h
new file mode 100644
index 0000000..2da1f62
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/Meshing.h
@@ -0,0 +1,17 @@
+//
+//  Meshing.h
+//  Meshing
+//
+//  Created by Aaron Thompson on 4/26/18.
+//  Copyright © 2018 Standard Cyborg. All rights reserved.
+//
+
+#import <Foundation/Foundation.h>
+
+//! Project version number for Meshing.
+FOUNDATION_EXPORT double MeshingVersionNumber;
+
+//! Project version string for Meshing.
+FOUNDATION_EXPORT const unsigned char MeshingVersionString[];
+
+#import <Meshing/MeshingOperation.h>
diff --git a/PoissonRecon-Xcode/Meshing/MeshingOperation.h b/PoissonRecon-Xcode/Meshing/MeshingOperation.h
new file mode 100644
index 0000000..8a9fae5
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/MeshingOperation.h
@@ -0,0 +1,45 @@
+//
+//  MeshingOperation.h
+//  Meshing
+//
+//  Created by Aaron Thompson on 4/26/18.
+//  Copyright © 2018 Standard Cyborg. All rights reserved.
+//
+
+#import <Foundation/Foundation.h>
+
+/** Reconstructs a mesh from a point cloud and trims the edges on the result. */
+@interface MeshingOperation : NSOperation
+
+- (instancetype)init NS_UNAVAILABLE;
+
+- (instancetype)initWithInputFilePath:(NSString *)inputPath
+                       outputFilePath:(NSString *)outputPath;
+
+/** Set this to be informed about the progress of the meshing operation.
+    @param progress From 0.0-1.0
+ */
+@property (nonatomic, copy) void (^progressHandler)(float progress);
+
+/** The resolution of the reconstructed mesh vertices.
+    Higher values will result in more vertices per meshes,
+    and also take longer to reconstruct.
+    Range is 1-10, default is 5.
+ */
+@property (nonatomic) int resolution;
+
+/** The smoothness of the reconstructed mesh vertex positions.
+    Range is 1-10, default is 2.
+ */
+@property (nonatomic) int smoothness;
+
+/** The amount of surface trimming for low-density mesh regions.
+    Range is 0-10, defaults to 5; higher numbers trim more away; 0 = don't trim.
+ */
+@property (nonatomic) int surfaceTrimmingAmount;
+
+/** If YES, attempts to build a closed mesh.
+ */
+@property (nonatomic) BOOL closed;
+
+@end
diff --git a/PoissonRecon-Xcode/Meshing/MeshingOperation.mm b/PoissonRecon-Xcode/Meshing/MeshingOperation.mm
new file mode 100644
index 0000000..38e7247
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/MeshingOperation.mm
@@ -0,0 +1,96 @@
+//
+//  MeshingOperation.mm
+//  Meshing
+//
+//  Created by Aaron Thompson on 4/26/18.
+//  Copyright © 2018 Standard Cyborg. All rights reserved.
+//
+
+#import <Meshing/MeshingOperation.h>
+#import "Parameters.hpp"
+#import "ExecuteEntryFunctions.hpp"
+
+using namespace std;
+
+static float fclampf(float value, float min, float max) {
+    return fmaxf(fminf(value, max), min);
+}
+
+static float remapAndClamp(float value, float originalMin, float originalMax, float newMin, float newMax) {
+    float shiftedOriginal = value - originalMin;
+    float scaled = shiftedOriginal * (newMax - newMin) / (originalMax - originalMin);
+    float shiftedNew = scaled + newMin;
+    
+    return fclampf(shiftedNew, newMin, newMax);
+}
+
+static const float kPoissonProgressFraction = 0.75;
+
+@implementation MeshingOperation {
+    NSString *_inputFilePath;
+    NSString *_outputFilePath;
+}
+
+- (instancetype)initWithInputFilePath:(NSString *)inputPath
+                       outputFilePath:(NSString *)outputPath
+{
+    self = [super init];
+    if (self) {
+        _inputFilePath = inputPath;
+        _outputFilePath = outputPath;
+        
+        _resolution = 5;
+        _smoothness = 2;
+        
+        _progressHandler = ^(float){};
+    }
+    return self;
+}
+
+- (void)main
+{
+    const char *inputPath = [_inputFilePath UTF8String];
+    NSString *tempPoissonOutputPathString = [NSTemporaryDirectory() stringByAppendingFormat:@"/poisson-%@.ply", [[NSUUID UUID] UUIDString]];
+    const char *poissonOutputPath = [tempPoissonOutputPathString UTF8String];
+    const char *surfaceTrimmerOutputPath = [_outputFilePath UTF8String];
+    
+    PoissonReconParameters poissonParams;
+    poissonParams.Depth = (int)remapAndClamp(_resolution, 1, 10, 4, 14);
+    poissonParams.SamplesPerNode = (int)remapAndClamp(_smoothness, 1, 10, 1, 15);
+    
+    SurfaceTrimmerParameters surfaceTrimmerParams;
+    surfaceTrimmerParams.Trim = (int)remapAndClamp(_surfaceTrimmingAmount, 1, 10, 1, 10);
+    
+    __weak MeshingOperation *weakSelf = self;
+    auto progressHandler = _progressHandler;
+    
+    if (![weakSelf isCancelled]) {
+        PoissonReconExecute(inputPath, poissonOutputPath, _closed, poissonParams, [weakSelf, progressHandler](float progress) {
+            float adjustedProgress = remapAndClamp(progress, 0, 1, 0, kPoissonProgressFraction);
+            
+            progressHandler(adjustedProgress);
+            
+            BOOL shouldContinue = [weakSelf isCancelled];
+            return !shouldContinue;
+        });
+    }
+    
+    if (![weakSelf isCancelled]) {
+        if (_surfaceTrimmingAmount <= 0) {
+            // Trimming disabled, just move the file to the destination
+            [[NSFileManager defaultManager] moveItemAtPath:tempPoissonOutputPathString toPath:_outputFilePath error:NULL];
+        } else {
+            SurfaceTrimmerExecute(poissonOutputPath, surfaceTrimmerOutputPath, surfaceTrimmerParams, [weakSelf, progressHandler](float progress) {
+                float adjustedProgress = remapAndClamp(progress, 0, 1, kPoissonProgressFraction, 1);
+                progressHandler(adjustedProgress);
+                
+                BOOL shouldContinue = [weakSelf isCancelled];
+                return !shouldContinue;
+            });
+        }
+    }
+    
+    [[NSFileManager defaultManager] removeItemAtPath:tempPoissonOutputPathString error:NULL];
+}
+
+@end
diff --git a/PoissonRecon-Xcode/Meshing/Parameters.hpp b/PoissonRecon-Xcode/Meshing/Parameters.hpp
new file mode 100644
index 0000000..7950c8d
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/Parameters.hpp
@@ -0,0 +1,32 @@
+//
+//  Parameters.hpp
+//  PoissonRecon
+//
+//  Created by Aaron Thompson on 2/13/19.
+//  Copyright © 2019 Standard Cyborg. All rights reserved.
+//
+
+#ifndef Parameters_hpp
+#define Parameters_hpp
+
+struct PoissonReconParameters {
+    int BaseDepth = 0;
+    int BaseVCycles = 1;
+    float CGSolverAccuracy = 0.001;
+    int Depth = 8;
+    int FullDepth = 5;
+    int Iters = 8;
+    int PointWeight = 2;
+    float SamplesPerNode = 1.5;
+    float Scale = 1.1;
+    int Threads = 1;
+};
+
+struct SurfaceTrimmerParameters {
+    int Smooth = 5;
+    int Trim = 5;
+    float IslandAreaRatio = 0.001;
+    bool PolygonMesh = false;
+};
+
+#endif /* Parameters_hpp */
diff --git a/PoissonRecon-Xcode/Meshing/PoissonReconExecute.hpp b/PoissonRecon-Xcode/Meshing/PoissonReconExecute.hpp
new file mode 100644
index 0000000..6ee9249
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/PoissonReconExecute.hpp
@@ -0,0 +1,430 @@
+//
+//  PoissonReconExecute.h
+//  PoissonRecon
+//
+//  Created by Aaron Thompson on 5/8/18.
+//  Copyright © 2018 Standard Cyborg. All rights reserved.
+//
+
+/**
+ Example parameters for execution of
+ PoissonRecon --in Scan.ply --out PoissonRecon.ply --colors --normals --density --depth 12
+ {
+ ASCII: true,
+ BaseDepth: 0,
+ BaseVCycles: 1,
+ BoundaryNames: ["free", "Dirichlet", "Neumann"],
+ BType: 3,
+ CGSolverAccuracy: 0.00100000005,
+ Colors: true,
+ Confidence: 0,
+ ConfidenceBias: 0,
+ DataX: 32,
+ Degree: 1,
+ Density: true,
+ Depth 12,
+ ExactInterpolation: false,
+ FEMTreeRealNames: ["float", "double"],
+ FullDepth: 5,
+ Grid: NULL,
+ In: "Scan.ply",
+ InCore: false,
+ Iters: 8,
+ KernelDepth: 0,
+ LinearFit: false,
+ MaxMemoryGB: 0,
+ messageWriter: {
+ outputFile: NULL,
+ echoSTDOUT: false,
+ },
+ NoComments: false,
+ NonManifold: false,
+ Normals: true,
+ Out: "PoissonRecon.ply",
+ Performance: false,
+ PointWeight: 2,
+ PolygonMesh: false,
+ PrimalGrid: false,
+ SamplesPerNode: 1.5,
+ Scale: 1.10000002,
+ ShowGlobalResidualNames: ["show none", "show last", "show all"],
+ ShowResidual: false,
+ TempDir: NULL,
+ Threads: 1,
+ Transform: NULL,
+ Tree: NULL,
+ Verbose: false,
+ Width: 0
+ }
+ */
+
+#include "MyMiscellany.h"
+#include "FEMTree.h"
+#include "Ply.h"
+#include "PointStreamData.h"
+
+#define DATA_DEGREE 0                            // The order of the B-Spline used to splat in data for color interpolation
+#define WEIGHT_DEGREE 2                            // The order of the B-Spline used to splat in the weights for density estimation
+#define NORMAL_DEGREE 2                            // The order of the B-Spline used to splat in the normals for constructing the Laplacian constraints
+#define DEFAULT_FEM_DEGREE 1                    // The default finite-element degree
+#define DEFAULT_FEM_BOUNDARY BOUNDARY_NEUMANN    // The default finite-element boundary type
+
+XForm<float, 3 + 1> GetBoundingBoxXForm(Point<float, 3> min, Point<float, 3> max, float scaleFactor)
+{
+    Point<float, 3> center = (max + min) / 2;
+    float scale = max[0] - min[0];
+    for (int d = 1; d < 3; d++) {
+        scale = std::max<float>(scale, max[d] - min[d]);
+    }
+    
+    scale *= scaleFactor;
+    
+    for (int i = 0; i < 3; i++) {
+        center[i] -= scale / 2;
+    }
+    
+    auto tXForm = XForm<float, 3 + 1>::Identity();
+    auto sXForm = XForm<float, 3 + 1>::Identity();
+    
+    for (int i = 0; i < 3; i++) {
+        sXForm(i, i) = 1.0f / scale;
+        tXForm(3, i) = -center[i];
+    }
+    
+    return sXForm * tXForm;
+}
+
+XForm<float, 4> GetPointXForm(InputPointStream<float, 3>& stream, float scaleFactor)
+{
+    Point<float, 3> min, max;
+    stream.boundingBox(min, max);
+    return GetBoundingBoxXForm(min, max, scaleFactor);
+}
+
+struct ConstraintDual
+{
+    float target, weight;
+    ConstraintDual(float t, float w) : target(t), weight(w) { }
+    CumulativeDerivativeValues<float, 3, 0> operator()(const Point<float, 3>& p) const {
+        return CumulativeDerivativeValues<float, 3, 0>(target * weight);
+    };
+};
+
+struct SystemDual {
+    float weight;
+    SystemDual(float w)
+    : weight(w)
+    { }
+    
+    CumulativeDerivativeValues<float,  3, 0> operator()(const Point<float, 3>& p, const CumulativeDerivativeValues<float,  3, 0>& dValues) const {
+        return dValues * weight;
+    };
+    
+    CumulativeDerivativeValues<double, 3, 0> operator()(const Point<float, 3>& p, const CumulativeDerivativeValues<double, 3, 0>& dValues) const {
+        return dValues * weight;
+    };
+};
+
+template<typename Vertex, unsigned int ... FEMSigs, typename ... SampleData>
+void ExtractMesh(UIntPack<FEMSigs ...>,
+                 std::tuple<SampleData ...>,
+                 FEMTree<sizeof ...(FEMSigs), float>& tree,
+                 const DenseNodeData<float, UIntPack<FEMSigs ...>>& solution,
+                 float isoValue,
+                 const std::vector<typename FEMTree<sizeof ...(FEMSigs), float>::PointSample>* samples,
+                 std::vector<MultiPointStreamData<float, PointStreamNormal<float, 3>, MultiPointStreamData<float, SampleData ...>>>* sampleData,
+                 const typename FEMTree<sizeof ...(FEMSigs), float>::template DensityEstimator<WEIGHT_DEGREE>* density,
+                 std::function<void (Vertex&, Point<float, 3>, float, MultiPointStreamData<float, PointStreamNormal<float, 3>, MultiPointStreamData<float, SampleData ...>>)> SetVertex,
+                 MessageWriter& messageWriter,
+                 std::vector<std::string> &comments,
+                 XForm<float, sizeof...(FEMSigs) + 1> iXForm,
+                 const char *Out)
+{
+    const bool ASCII = true;
+    const int DataX = 32;
+    const bool LinearFit = false;
+    const bool NoComments = true;
+    const bool NonManifold = false;
+    const bool PolygonMesh = false;
+    
+    typedef UIntPack<FEMSigs ...> Sigs;
+    typedef PointStreamNormal<float, 3> NormalPointSampleData;
+    typedef MultiPointStreamData<float, SampleData ...> AdditionalPointSampleData;
+    typedef MultiPointStreamData<float, NormalPointSampleData, AdditionalPointSampleData> TotalPointSampleData;
+    static const unsigned int DataSig = FEMDegreeAndBType<DATA_DEGREE, BOUNDARY_FREE>::Signature;
+    typedef typename FEMTree<3, float>::template DensityEstimator<WEIGHT_DEGREE> DensityEstimator;
+    
+    char tempHeader[1024];
+    {
+        char tempPath[1024];
+        tempPath[0] = 0;
+        SetTempDirectory(tempPath, sizeof(tempPath));
+        
+        if (strlen(tempPath) == 0) sprintf(tempPath, ".%c", FileSeparator);
+        if (tempPath[strlen(tempPath) - 1] == FileSeparator) sprintf(tempHeader, "%sPR_", tempPath);
+        else                                                 sprintf(tempHeader, "%s%cPR_", tempPath, FileSeparator);
+    }
+    CoredMeshData<Vertex> *mesh = new CoredFileMeshData<Vertex>(tempHeader);
+    
+    typename IsoSurfaceExtractor<3, float, Vertex>::IsoStats isoStats;
+    
+    if (sampleData)
+    {
+        SparseNodeData<ProjectiveData<TotalPointSampleData, float>, IsotropicUIntPack<3, DataSig>> _sampleData = tree.template setDataField<DataSig, false>(*samples, *sampleData, (DensityEstimator*)NULL);
+        for (const RegularTreeNode<3, FEMTreeNodeData>* n = tree.tree().nextNode(); n; n = tree.tree().nextNode(n))
+        {
+            ProjectiveData<TotalPointSampleData, float>* clr = _sampleData(n);
+            if (clr) { (*clr) *= (float)pow(DataX, tree.depth(n)); }
+        }
+        
+        isoStats = IsoSurfaceExtractor<3, float, Vertex>::template Extract<TotalPointSampleData>(Sigs(), UIntPack<WEIGHT_DEGREE>(), UIntPack<DataSig>(), tree, density, &_sampleData, solution, isoValue, *mesh, SetVertex, !LinearFit, !NonManifold, PolygonMesh, false);
+    }
+#if defined(__GNUC__) && __GNUC__ < 5
+    // #warning "you've got me gcc version < 5"
+    else isoStats = IsoSurfaceExtractor<3, float, Vertex>::template Extract<TotalPointSampleData>(Sigs(), UIntPack<WEIGHT_DEGREE>(), UIntPack<DataSig>(), tree, density, (SparseNodeData<ProjectiveData<TotalPointSampleData, float>, IsotropicUIntPack<3, DataSig>> *)NULL, solution, isoValue, *mesh, SetVertex, !LinearFit, !NonManifold, PolygonMesh, false);
+#else // !__GNUC__ || __GNUC__ >= 5
+    else isoStats = IsoSurfaceExtractor<3, float, Vertex>::template Extract<TotalPointSampleData>(Sigs(), UIntPack<WEIGHT_DEGREE>(), UIntPack<DataSig>(), tree, density, NULL, solution, isoValue, *mesh, SetVertex, !LinearFit, !NonManifold, PolygonMesh, false);
+#endif // __GNUC__ || __GNUC__ < 4
+    messageWriter("Vertices / Polygons: %d / %d\n", mesh->outOfCorePointCount() + mesh->inCorePoints.size(), mesh->polygonCount());
+    std::string isoStatsString = isoStats.toString() + std::string("\n");
+    messageWriter(isoStatsString.c_str());
+    
+    std::vector<std::string> noComments;
+    if (!PlyWritePolygons<Vertex, float, 3>(Out, mesh, ASCII ? PLY_ASCII : PLY_BINARY_NATIVE, NoComments ? noComments : comments, iXForm)) {
+        ERROR_OUT("Could not write mesh to: %s", Out);
+    }
+    
+    delete mesh;
+}
+
+// Called templated as Execute<float, PointStreamColor<float>>(argc, argv, IsotropicUIntPack<3, FEMDegreeAndBType<1, BOUNDARY_NEUMANN>::Signature>())
+template<typename ... SampleData, unsigned int ... FEMSigs>
+void _PoissonReconExecute(const char *In,
+                          const char *Out,
+                          PoissonReconParameters params,
+                          std::function<bool (float)> progressHandler,
+                          UIntPack<FEMSigs ...>)
+{
+    typedef UIntPack<FEMSigs ...> Sigs;
+    typedef UIntPack<FEMSignature<FEMSigs>::Degree ...> Degrees;
+    typedef UIntPack<FEMDegreeAndBType<NORMAL_DEGREE, DerivativeBoundary<FEMSignature<FEMSigs>::BType, 1>::BType>::Signature ...> NormalSigs;
+    typedef typename FEMTree<3, float>::template DensityEstimator<WEIGHT_DEGREE> DensityEstimator;
+    typedef typename FEMTree<3, float>::template InterpolationInfo<float, 0> InterpolationInfo;
+    typedef PointStreamNormal<float, 3> NormalPointSampleData;
+    typedef MultiPointStreamData<float, SampleData ...> AdditionalPointSampleData;
+    typedef MultiPointStreamData<float, NormalPointSampleData, AdditionalPointSampleData> TotalPointSampleData;
+    typedef InputPointStreamWithData<float, 3, TotalPointSampleData> InputPointStream;
+    typedef TransformedInputPointStreamWithData<float, 3, TotalPointSampleData> XInputPointStream;
+    MessageWriter messageWriter;
+    messageWriter.echoSTDOUT = false;
+    std::vector<std::string> comments;
+    
+    messageWriter(comments, "*************************************************************\n");
+    messageWriter(comments, "** Running Screened Poisson Reconstruction (Version %s) **\n", VERSION);
+    messageWriter(comments, "*************************************************************\n");
+    
+    XForm<float, 3 + 1> xForm = XForm<float, 3 + 1>::Identity();
+    
+    float isoValue = 0;
+    
+    if (progressHandler(0) == false) { return; }
+    
+    FEMTree<3, float> tree(MEMORY_ALLOCATOR_BLOCK_SIZE);
+    
+    int pointCount;
+    
+    float pointWeightSum;
+    std::vector<typename FEMTree<3, float>::PointSample>* samples = new std::vector<typename FEMTree<3, float>::PointSample>();
+    std::vector<TotalPointSampleData>* sampleData = NULL;
+    DensityEstimator* density = NULL;
+    SparseNodeData<Point<float, 3>, NormalSigs>* normalInfo = NULL;
+    float targetValue = (float)0.5;
+    
+    // Read in the samples (and color data)
+    {
+        sampleData = new std::vector<TotalPointSampleData>();
+        std::vector<std::pair<Point<float, 3>, TotalPointSampleData>> inCorePoints;
+        InputPointStream* pointStream = new PLYInputPointStreamWithData<float, 3, TotalPointSampleData>
+        (
+            In,
+            TotalPointSampleData::PlyReadProperties(),
+            TotalPointSampleData::PlyReadNum,
+            TotalPointSampleData::ValidPlyReadProperties
+        );
+        
+        typename TotalPointSampleData::Transform _xForm(xForm);
+        XInputPointStream _pointStream([&](Point<float, 3>& p, TotalPointSampleData& d){ p = xForm * p, d = _xForm(d); }, *pointStream);
+        xForm = params.Scale > 0 ? GetPointXForm(_pointStream, (float)params.Scale) * xForm : xForm;
+        {
+            typename TotalPointSampleData::Transform _xForm(xForm);
+            XInputPointStream _pointStream([&](Point<float, 3>& p, TotalPointSampleData& d){ p = xForm*p, d = _xForm(d); }, *pointStream);
+            auto ProcessData = [](const Point<float, 3>& p, TotalPointSampleData& d)
+            {
+                float l = (float)Length(std::get<0>(d.data).data);
+                
+                if (!l || l != l) { return -1.0f; }
+                
+                std::get<0>(d.data).data /= l;
+                
+                return 1.0f;
+            };
+            
+            pointCount = FEMTreeInitializer<3, float>::template Initialize<TotalPointSampleData>(tree.spaceRoot(), _pointStream, params.Depth, *samples, *sampleData, true, tree.nodeAllocator, tree.initializer(), ProcessData);
+        }
+        
+        delete pointStream;
+        
+        messageWriter("Input Points / Samples: %d / %d\n", pointCount, samples->size());
+    }
+    
+    if (progressHandler(0.2) == false) { return; }
+    
+    int kernelDepth = params.Depth - 2;
+    
+    DenseNodeData<float, Sigs> solution;
+    {
+        DenseNodeData<float, Sigs> constraints;
+        InterpolationInfo* iInfo = NULL;
+        int solveDepth = params.Depth;
+        
+        tree.resetNodeIndices();
+        
+        // Get the kernel density estimator
+        {
+            density = tree.template setDensityEstimator<WEIGHT_DEGREE>(*samples, kernelDepth, params.SamplesPerNode, 1);
+        }
+        
+        // Transform the Hermite samples into a vector field
+        {
+            normalInfo = new SparseNodeData<Point<float, 3>, NormalSigs>();
+            *normalInfo = tree.setNormalField(NormalSigs(), *samples, *sampleData, density, pointWeightSum);
+            
+            #pragma omp parallel for
+            for (int i = 0; i < normalInfo->size(); i++)
+            {
+                (*normalInfo)[i] *= -1.0f;
+            }
+            
+            messageWriter("Point weight / Estimated Area: %g / %g\n", pointWeightSum, pointCount * pointWeightSum);
+        }
+        
+        // Trim the tree and prepare for multigrid
+        {
+            constexpr int MAX_DEGREE = NORMAL_DEGREE> Degrees::Max() ? NORMAL_DEGREE : Degrees::Max();
+            tree.template finalizeForMultigrid<MAX_DEGREE>(params.FullDepth, typename FEMTree<3, float>::template HasNormalDataFunctor<NormalSigs>(*normalInfo), normalInfo, density);
+        }
+        
+        // Add the FEM constraints
+        {
+            constraints = tree.initDenseNodeData(Sigs());
+            typename FEMIntegrator::template Constraint<Sigs, IsotropicUIntPack<3, 1>, NormalSigs, IsotropicUIntPack<3, 0>, 3> F;
+            unsigned int derivatives2[3];
+            for (int d = 0; d < 3; d++) { derivatives2[d] = 0; }
+            
+            typedef IsotropicUIntPack<3, 1> Derivatives1;
+            typedef IsotropicUIntPack<3, 0> Derivatives2;
+            
+            for (int d = 0; d < 3; d++)
+            {
+                unsigned int derivatives1[3];
+                for (int dd = 0; dd < 3; dd++) { derivatives1[dd] = dd == d ? 1 : 0; }
+                
+                F.weights[d][TensorDerivatives<Derivatives1>::Index(derivatives1)][TensorDerivatives<Derivatives2>::Index(derivatives2)] = 1;
+            }
+            
+            tree.addFEMConstraints(F, *normalInfo, constraints, solveDepth);
+        }
+        
+        // Free up the normal info
+        delete normalInfo, normalInfo = NULL;
+        
+        if (progressHandler(0.3) == false) { return; }
+        
+        // Add the interpolation constraints
+        if (params.PointWeight > 0)
+        {
+            iInfo = FEMTree<3, float>::template InitializeApproximatePointInterpolationInfo<float, 0>(
+                                                                                                      tree,
+                                                                                                      *samples,
+                                                                                                      ConstraintDual(targetValue, (float)params.PointWeight * pointWeightSum),
+                                                                                                      SystemDual((float)params.PointWeight * pointWeightSum),
+                                                                                                      true,
+                                                                                                      1);
+            tree.addInterpolationConstraints(constraints, solveDepth, *iInfo);
+        }
+        
+        messageWriter("Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n", (int)tree.leaves(), (int)tree.nodes(), (int)tree.ghostNodes());
+        
+        if (progressHandler(0.4) == false) { return; }
+        
+        // Solve the linear system
+        {
+            typename FEMTree<3, float>::SolverInfo sInfo;
+            sInfo.cgDepth = 0;
+            sInfo.cascadic = true;
+            sInfo.vCycles = 1;
+            sInfo.iters = params.Iters;
+            sInfo.cgAccuracy = params.CGSolverAccuracy;
+            sInfo.verbose = false;
+            sInfo.showResidual = false;
+            sInfo.showGlobalResidual = SHOW_GLOBAL_RESIDUAL_NONE;
+            sInfo.sliceBlockSize = 1;
+            sInfo.baseDepth = params.BaseDepth;
+            sInfo.baseVCycles = params.BaseVCycles;
+            
+            typename FEMIntegrator::template System<Sigs, IsotropicUIntPack<3, 1>> F({ 0.0, 1.0 });
+            solution = tree.solveSystem(Sigs(), F, constraints, solveDepth, sInfo, iInfo);
+            if (iInfo) delete iInfo, iInfo = NULL;
+        }
+        
+        if (progressHandler(0.5) == false) { return; }
+    }
+    
+    {
+        double valueSum = 0, weightSum = 0;
+        typename FEMTree<3, float>::template MultiThreadedEvaluator<Sigs, 0> evaluator(&tree, solution);
+        
+        #pragma omp parallel for reduction(+ : valueSum, weightSum)
+        for (int j = 0; j < samples->size(); j++)
+        {
+            ProjectiveData<Point<float, 3>, float>& sample = (*samples)[j].sample;
+            float w = sample.weight;
+            if (w > 0) {
+                weightSum += w;
+                valueSum += evaluator.values(sample.data / sample.weight, omp_get_thread_num(), (*samples)[j].node)[0] * w;
+            }
+        }
+        
+        isoValue = (float)(valueSum / weightSum);
+        messageWriter("Iso-Value: %e = %g / %g\n", isoValue, valueSum, weightSum);
+    }
+    
+    if (progressHandler(0.6) == false) { return; }
+    
+    typedef PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamNormal<float, 3>, PointStreamValue<float>, AdditionalPointSampleData>> Vertex;
+    std::function<void (Vertex&, Point<float, 3>, float, TotalPointSampleData)> SetVertex = [](Vertex& v, Point<float, 3> p, float w, TotalPointSampleData d) {
+        v.point = p;
+        std::get<0>(v.data.data) = std::get<0>(d.data);
+        std::get<1>(v.data.data).data = w;
+        std::get<2>(v.data.data) = std::get<1>(d.data);
+    };
+    
+    ExtractMesh<Vertex>(UIntPack<FEMSigs ...>(),
+                        std::tuple<SampleData ...>(),
+                        tree,
+                        solution,
+                        isoValue,
+                        samples,
+                        sampleData,
+                        density,
+                        SetVertex,
+                        messageWriter,
+                        comments,
+                        xForm.inverse(),
+                        Out);
+    
+    if (sampleData) { delete sampleData; sampleData = NULL; }
+    if (density) { delete density, density = NULL; }
+    
+    progressHandler(1);
+}
diff --git a/PoissonRecon-Xcode/Meshing/SurfaceTrimmerExecute.hpp b/PoissonRecon-Xcode/Meshing/SurfaceTrimmerExecute.hpp
new file mode 100644
index 0000000..606568b
--- /dev/null
+++ b/PoissonRecon-Xcode/Meshing/SurfaceTrimmerExecute.hpp
@@ -0,0 +1,490 @@
+//
+//  SurfaceTrimmerExecute.hpp
+//  PoissonRecon
+//
+//  Created by Aaron Thompson on 2/12/19.
+//  Copyright © 2019 Standard Cyborg. All rights reserved.
+//
+
+#ifndef SurfaceTrimmerExecute_hpp
+#define SurfaceTrimmerExecute_hpp
+
+#include "MyMiscellany.h"
+#include "FEMTree.h"
+#include "Ply.h"
+#include "PointStreamData.h"
+#include <iostream>
+
+/**
+ Command-line parameters:
+ {
+ "In": "/path/to/input.ply",
+ "Out": "/path/to/output.ply",
+ "Smooth": 5,
+ "Trim": 5,
+ "IslandAreaRatio": 0.001,
+ "PolygonMesh": false,
+ "Verbose": false
+ }
+ */
+
+long long EdgeKey(int key1, int key2)
+{
+    if (key1 < key2) {
+        return (((long long)key1) << 32) | ((long long)key2);
+    } else {
+        return (((long long)key2) << 32) | ((long long)key1);
+    }
+}
+
+template <typename ... VertexData>
+PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData...>>
+InterpolateVertices(const PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData...>>& v1,
+                    const PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData...>>& v2,
+                    float value)
+{
+    if (std::get<0>(v1.data.data).data == std::get<0>(v2.data.data).data) {
+        return (v1 + v2) / float(2.0);
+    }
+    
+    float dx = (std::get<0>(v1.data.data).data - value) / (std::get<0>(v1.data.data).data - std::get<0>(v2.data.data).data);
+    
+    return v1 * (1.f - dx) + v2 * dx;
+}
+
+template <typename ... VertexData>
+void SmoothValues(std::vector<PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData...>>>& vertices,
+                  const std::vector<std::vector<int>>& polygons)
+{
+    std::vector<int> count(vertices.size());
+    std::vector<float> sums(vertices.size(), 0);
+    
+    for (size_t i = 0; i < polygons.size(); i++) {
+        int sz = int(polygons[i].size());
+        
+        for (int j = 0; j < sz; j++) {
+            int j1 = j;
+            int j2 = (j + 1) % sz;
+            int v1 = polygons[i][j1];
+            int v2 = polygons[i][j2];
+            count[v1]++;
+            count[v2]++;
+            sums[v1] += std::get<0>(vertices[v2].data.data).data;
+            sums[v2] += std::get<0>(vertices[v1].data.data).data;
+        }
+    }
+    
+    for (size_t i = 0; i < vertices.size(); i++) {
+        std::get<0>(vertices[i].data.data).data = (sums[i] + std::get<0>(vertices[i].data.data).data) / (count[i] + 1);
+    }
+}
+
+template <typename ... VertexData>
+void SplitPolygon(const std::vector<int>& polygon,
+                  std::vector<PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData...>>>& vertices,
+                  std::vector<std::vector<int>>* ltPolygons, std::vector<std::vector<int>>* gtPolygons,
+                  std::vector<bool>* ltFlags, std::vector<bool>* gtFlags,
+                  std::unordered_map<long long, int>& vertexTable,
+                  float trimValue)
+{
+    int sz = int(polygon.size());
+    std::vector<bool> gt(sz);
+    int gtCount = 0;
+    
+    for (int j = 0; j < sz; j++) {
+        gt[j] = (std::get<0>(vertices[polygon[j]].data.data).data > trimValue);
+        
+        if (gt[j]) {
+            gtCount++;
+        }
+    }
+    
+    if (gtCount == sz) {
+        if (gtPolygons) {
+            gtPolygons->push_back(polygon);
+        }
+        if (gtFlags) {
+            gtFlags->push_back(false);
+        }
+    }
+    else if (gtCount == 0) {
+        if (ltPolygons) {
+            ltPolygons->push_back(polygon);
+        }
+        if (ltFlags) {
+            ltFlags->push_back(false);
+        }
+    }
+    else {
+        int start;
+        for (start = 0; start < sz; start++) {
+            if (gt[start] && !gt[(start + sz - 1) % sz]) {
+                break;
+            }
+        }
+        
+        bool gtFlag = true;
+        std::vector<int> poly;
+        
+        // Add the initial vertex
+        {
+            int j1 = (start + int(sz) - 1) % sz, j2 = start;
+            int v1 = polygon[j1], v2 = polygon[j2];
+            int vIdx;
+            std::unordered_map<long long, int>::iterator iter = vertexTable.find(EdgeKey(v1, v2));
+            
+            if (iter == vertexTable.end()) {
+                vertexTable[EdgeKey(v1, v2)] = vIdx = int(vertices.size());
+                vertices.push_back(InterpolateVertices(vertices[v1], vertices[v2], trimValue));
+            } else {
+                vIdx = iter->second;
+            }
+            
+            poly.push_back(vIdx);
+        }
+        
+        for (int _j = 0; _j <= sz; _j++) {
+            int j1 = (_j + start + sz - 1) % sz, j2 = (_j + start) % sz;
+            int v1 = polygon[j1], v2 = polygon[j2];
+            
+            if (gt[j2] == gtFlag) {
+                poly.push_back(v2);
+            } else {
+                int vIdx;
+                std::unordered_map<long long, int>::iterator iter = vertexTable.find(EdgeKey(v1, v2));
+                
+                if (iter == vertexTable.end()) {
+                    vertexTable[EdgeKey(v1, v2)] = vIdx = int(vertices.size());
+                    vertices.push_back(InterpolateVertices(vertices[v1], vertices[v2], trimValue));
+                } else {
+                    vIdx = iter->second;
+                }
+                
+                poly.push_back(vIdx);
+                
+                if (gtFlag)
+                {
+                    if (gtPolygons) {
+                        gtPolygons->push_back(poly);
+                    }
+                    if (ltFlags) {
+                        ltFlags->push_back(true);
+                    }
+                }
+                else
+                {
+                    if (ltPolygons) {
+                        ltPolygons->push_back(poly);
+                    }
+                    if (gtFlags) {
+                        gtFlags->push_back(true);
+                    }
+                }
+                poly.clear(), poly.push_back(vIdx), poly.push_back(v2);
+                gtFlag = !gtFlag;
+            }
+        }
+    }
+}
+
+template <class Vertex>
+void Triangulate(const std::vector<Vertex>& vertices, const std::vector<std::vector<int>>& polygons, std::vector<std::vector<int>>& triangles)
+{
+    triangles.clear();
+    
+    for (size_t i = 0; i < polygons.size(); i++) {
+        if (polygons.size() > 3) {
+            std::vector<Point<float, 3>> _vertices(polygons[i].size());
+            for (int j = 0; j < int(polygons[i].size()); j++) {
+                _vertices[j] = vertices[polygons[i][j]].point;
+            }
+            
+            std::vector<TriangleIndex> _triangles = MinimalAreaTriangulation<float, 3>((ConstPointer(Point<float, 3>))GetPointer(_vertices), _vertices.size());
+            
+            // Add the triangles to the mesh
+            size_t idx = triangles.size();
+            triangles.resize(idx + _triangles.size());
+            
+            for (int j = 0; j < int(_triangles.size()); j++) {
+                triangles[idx + j].resize(3);
+                for (int k = 0; k < 3; k++) {
+                    triangles[idx + j][k] = polygons[i][_triangles[j].idx[k]];
+                }
+            }
+        }
+        else if (polygons[i].size() == 3) {
+            triangles.push_back(polygons[i]);
+        }
+    }
+}
+
+template <class Vertex>
+double PolygonArea(const std::vector<Vertex>& vertices, const std::vector<int>& polygon)
+{
+    if (polygon.size() < 3) {
+        return 0.0;
+    }
+    else if (polygon.size() == 3) {
+        return Area(vertices[polygon[0]].point, vertices[polygon[1]].point, vertices[polygon[2]].point);
+    }
+    else {
+        Point<float, 3> center;
+        for (size_t i = 0; i < polygon.size(); i++) {
+            center += vertices[polygon[i]].point;
+        }
+        
+        center /= float(polygon.size());
+        double area = 0;
+        
+        for (size_t i = 0; i < polygon.size(); i++) {
+            area += Area(center, vertices[polygon[i]].point, vertices[polygon[(i + 1) % polygon.size()]].point);
+        }
+        
+        return area;
+    }
+}
+
+template <class Vertex>
+void RemoveHangingVertices(std::vector<Vertex>& vertices, std::vector<std::vector<int>>& polygons)
+{
+    std::unordered_map<int, int> vMap;
+    std::vector<bool> vertexFlags(vertices.size(), false);
+    
+    for (size_t i = 0; i < polygons.size(); i++) {
+        for (size_t j = 0; j < polygons[i].size(); j++) {
+            vertexFlags[polygons[i][j]] = true;
+        }
+    }
+    
+    int vCount = 0;
+    for (int i = 0; i < int(vertices.size()); i++) {
+        if (vertexFlags[i]) {
+            vMap[i] = vCount++;
+        }
+    }
+    
+    for (size_t i = 0; i < polygons.size(); i++) {
+        for (size_t j = 0; j < polygons[i].size(); j++) {
+            polygons[i][j] = vMap[polygons[i][j]];
+        }
+    }
+    
+    std::vector<Vertex> _vertices(vCount);
+    for (int i = 0; i < int(vertices.size()); i++) {
+        if (vertexFlags[i]) {
+            _vertices[vMap[i]] = vertices[i];
+        }
+    }
+    
+    vertices = _vertices;
+}
+
+void SetConnectedComponents(const std::vector<std::vector<int>>& polygons,
+                            std::vector<std::vector<int>>& components)
+{
+    std::vector<int> polygonRoots(polygons.size());
+    for (size_t i = 0; i < polygons.size(); i++) {
+        polygonRoots[i] = int(i);
+    }
+    
+    std::unordered_map<long long, int> edgeTable;
+    for (size_t i = 0; i < polygons.size(); i++) {
+        int sz = int(polygons[i].size());
+        
+        for (int j = 0; j < sz; j++) {
+            int j1 = j, j2 = (j + 1) % sz;
+            int v1 = polygons[i][j1], v2 = polygons[i][j2];
+            long long eKey = EdgeKey(v1, v2);
+            std::unordered_map<long long, int>::iterator iter = edgeTable.find(eKey);
+            
+            if (iter == edgeTable.end()) {
+                edgeTable[eKey] = int(i);
+            } else {
+                int p = iter->second;
+                while (polygonRoots[p] != p) {
+                    int temp = polygonRoots[p];
+                    polygonRoots[p] = int(i);
+                    p = temp;
+                }
+                polygonRoots[p] = int(i);
+            }
+        }
+    }
+    
+    for (size_t i = 0; i < polygonRoots.size(); i++) {
+        int p = int(i);
+        while (polygonRoots[p] != p) {
+            p = polygonRoots[p];
+        }
+        
+        int root = p;
+        p = int(i);
+        
+        while (polygonRoots[p] != p) {
+            int temp = polygonRoots[p];
+            polygonRoots[p] = root;
+            p = temp;
+        }
+    }
+    
+    int cCount = 0;
+    std::unordered_map<int, int> vMap;
+    for (int i = 0; i < int(polygonRoots.size()); i++) {
+        if (polygonRoots[i] == i) {
+            vMap[i] = cCount++;
+        }
+    }
+    
+    components.resize(cCount);
+    for (int i = 0; i < int(polygonRoots.size()); i++) {
+        components[vMap[polygonRoots[i]]].push_back(i);
+    }
+}
+
+template <typename ... VertexData>
+int _SurfaceTrimmerExecute(const char *In,
+                           const char *Out,
+                           SurfaceTrimmerParameters params,
+                           std::function<bool (float)> progressHandler)
+{
+    typedef MultiPointStreamData<float, PointStreamValue<float>, PointStreamNormal<float, 3>, PointStreamColor<float>> PLYCheckingVertexData;
+    typedef PlyVertexWithData<float, 3, PLYCheckingVertexData> PLYCheckingVertex;
+    bool readFlags[PLYCheckingVertex::PlyReadNum];
+    
+    if (!PlyReadHeader((char *)In, PLYCheckingVertex::PlyReadProperties(), PLYCheckingVertex::PlyReadNum, readFlags)) {
+        std::cerr << "Failed to read ply header: " << In << std::endl;
+        return -1;
+    }
+    
+    if (!PLYCheckingVertexData::ValidPlyReadProperties<0>(readFlags + 3)) {
+        std::cerr << "Ply file does not contain values";
+        return -1;
+    }
+    
+    if (progressHandler(0) == false) { return -1; }
+    
+    MessageWriter messageWriter;
+    messageWriter.echoSTDOUT = false;
+    
+    typedef PlyVertexWithData<float, 3, MultiPointStreamData<float, PointStreamValue<float>, VertexData ...>> Vertex;
+    float min, max;
+    std::vector<Vertex> vertices;
+    std::vector<std::vector<int>> polygons;
+    
+    int ft;
+    std::vector<std::string> comments;
+    PlyReadPolygons<Vertex>(In, vertices, polygons, Vertex::PlyReadProperties(), Vertex::PlyReadNum, ft, comments);
+    
+    if (progressHandler(0.1) == false) { return -1; }
+    
+    for (int i = 0; i < params.Smooth; i++) {
+        SmoothValues(vertices, polygons);
+    }
+    
+    if (progressHandler(0.2) == false) { return -1; }
+    
+    min = max = std::get<0>(vertices[0].data.data).data;
+    for (size_t i = 0; i < vertices.size(); i++) {
+        min = std::min<float>(min, std::get<0>(vertices[i].data.data).data), max = std::max<float>(max, std::get<0>(vertices[i].data.data).data);
+    }
+    
+    std::unordered_map<long long, int> vertexTable;
+    std::vector<std::vector<int>> ltPolygons, gtPolygons;
+    std::vector<bool> ltFlags, gtFlags;
+    
+    messageWriter(comments, "*********************************************\n");
+    messageWriter(comments, "** Running Surface Trimmer **\n");
+    messageWriter(comments, "*********************************************\n");
+    
+    for (size_t i = 0; i < polygons.size(); i++) {
+        SplitPolygon(polygons[i], vertices, &ltPolygons, &gtPolygons, &ltFlags, &gtFlags, vertexTable, params.Trim);
+    }
+    
+    if (progressHandler(0.3) == false) { return -1; }
+    
+    if (params.IslandAreaRatio > 0) {
+        std::vector<std::vector<int>> _ltPolygons, _gtPolygons;
+        std::vector<std::vector<int>> ltComponents, gtComponents;
+        SetConnectedComponents(ltPolygons, ltComponents);
+        SetConnectedComponents(gtPolygons, gtComponents);
+        std::vector<double> ltAreas(ltComponents.size(), 0.0), gtAreas(gtComponents.size(), 0.0);
+        std::vector<bool> ltComponentFlags(ltComponents.size(), false), gtComponentFlags(gtComponents.size(), false);
+        double area = 0.0;
+        
+        for (size_t i = 0; i < ltComponents.size(); i++) {
+            for (size_t j = 0; j < ltComponents[i].size(); j++) {
+                ltAreas[i] += PolygonArea<Vertex>(vertices, ltPolygons[ltComponents[i][j]]);
+                ltComponentFlags[i] = (ltComponentFlags[i] || ltFlags[ltComponents[i][j]]);
+            }
+            area += ltAreas[i];
+        }
+        
+        for (size_t i = 0; i < gtComponents.size(); i++) {
+            for (size_t j = 0; j < gtComponents[i].size(); j++) {
+                gtAreas[i] += PolygonArea<Vertex>(vertices, gtPolygons[gtComponents[i][j]]);
+                gtComponentFlags[i] = (gtComponentFlags[i] || gtFlags[gtComponents[i][j]]);
+            }
+            area += gtAreas[i];
+        }
+        
+        for (size_t i = 0; i < ltComponents.size(); i++) {
+            if (ltAreas[i] < area * params.IslandAreaRatio && ltComponentFlags[i]) {
+                for (size_t j = 0; j < ltComponents[i].size(); j++) {
+                    _gtPolygons.push_back(ltPolygons[ltComponents[i][j]]);
+                }
+            }
+            else {
+                for (size_t j = 0; j < ltComponents[i].size(); j++) {
+                    _ltPolygons.push_back(ltPolygons[ltComponents[i][j]]);
+                }
+            }
+        }
+        
+        for (size_t i = 0; i < gtComponents.size(); i++) {
+            if (gtAreas[i] < area * params.IslandAreaRatio && gtComponentFlags[i]) {
+                for (size_t j = 0; j < gtComponents[i].size(); j++) {
+                    _ltPolygons.push_back(gtPolygons[gtComponents[i][j]]);
+                }
+            }
+            else {
+                for (size_t j = 0; j < gtComponents[i].size(); j++) {
+                    _gtPolygons.push_back(gtPolygons[gtComponents[i][j]]);
+                }
+            }
+        }
+        
+        ltPolygons = _ltPolygons;
+        gtPolygons = _gtPolygons;
+    }
+    
+    if (progressHandler(0.6) == false) { return -1; }
+    
+    if (!params.PolygonMesh) {
+        {
+            std::vector<std::vector<int>> polys = ltPolygons;
+            Triangulate<Vertex>(vertices, ltPolygons, polys);
+            ltPolygons = polys;
+        }
+        {
+            std::vector<std::vector<int>> polys = gtPolygons;
+            Triangulate<Vertex>(vertices, gtPolygons, polys);
+            gtPolygons = polys;
+        }
+    }
+    
+    if (progressHandler(0.8) == false) { return -1; }
+    
+    RemoveHangingVertices(vertices, gtPolygons);
+    
+    if (progressHandler(0.9) == false) { return -1; }
+    
+    if (!PlyWritePolygons<Vertex>(Out, vertices, gtPolygons, Vertex::PlyWriteProperties(), Vertex::PlyWriteNum, ft, comments)) {
+        ERROR_OUT("Could not write mesh to: %s", Out);
+    }
+    
+    progressHandler(1);
+    
+    return EXIT_SUCCESS;
+}
+
+#endif /* SurfaceTrimmerExecute_hpp */
diff --git a/PoissonRecon-Xcode/MeshingTests/Info.plist b/PoissonRecon-Xcode/MeshingTests/Info.plist
new file mode 100644
index 0000000..6c40a6c
--- /dev/null
+++ b/PoissonRecon-Xcode/MeshingTests/Info.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>BNDL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+</dict>
+</plist>
diff --git a/PoissonRecon-Xcode/MeshingTests/MeshingTests.m b/PoissonRecon-Xcode/MeshingTests/MeshingTests.m
new file mode 100644
index 0000000..5efe2f5
--- /dev/null
+++ b/PoissonRecon-Xcode/MeshingTests/MeshingTests.m
@@ -0,0 +1,36 @@
+//
+//  MeshingTests.m
+//  MeshingTests
+//
+//  Created by Aaron Thompson on 4/26/18.
+//  Copyright © 2018 Standard Cyborg. All rights reserved.
+//
+
+#import <XCTest/XCTest.h>
+#import <Meshing/Meshing.h>
+
+@interface MeshingTests : XCTestCase
+
+@end
+
+@implementation MeshingTests
+
+- (void)testMeshingOperation {
+    // https://stackoverflow.com/questions/26811170/how-to-create-a-single-shared-framework-between-ios-and-os-x
+    NSString *inputPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"app_scan" ofType:@"ply"];
+    NSString *outputPath = [NSTemporaryDirectory() stringByAppendingPathComponent:@"poisson_reconstructed_trimmed.ply"];
+    
+    NSLog(@"Running Poisson on file %@", inputPath);
+    
+    MeshingOperation *operation = [[MeshingOperation alloc] initWithInputFilePath:inputPath outputFilePath:outputPath];
+    operation.resolution = 5;
+    operation.smoothness = 1;
+    operation.surfaceTrimmingAmount = 7;
+    operation.closed = YES;
+    [operation start];
+    
+    NSLog(@"Finished with output at %@", outputPath);
+    XCTAssertTrue([[NSFileManager defaultManager] fileExistsAtPath:outputPath]);
+}
+
+@end
diff --git a/PoissonRecon-Xcode/MeshingTests/app_scan.jpg b/PoissonRecon-Xcode/MeshingTests/app_scan.jpg
new file mode 100755
index 0000000..e443b17
Binary files /dev/null and b/PoissonRecon-Xcode/MeshingTests/app_scan.jpg differ
diff --git a/PoissonRecon-Xcode/MeshingTests/app_scan.ply b/PoissonRecon-Xcode/MeshingTests/app_scan.ply
new file mode 100644
index 0000000..31870d4
Binary files /dev/null and b/PoissonRecon-Xcode/MeshingTests/app_scan.ply differ
diff --git a/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.pbxproj b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.pbxproj
new file mode 100644
index 0000000..7f63a32
--- /dev/null
+++ b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.pbxproj
@@ -0,0 +1,1211 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		7619BF52220E243D003B2F5C /* PlyFile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7619BF1E220E243A003B2F5C /* PlyFile.cpp */; };
+		7619BF53220E243D003B2F5C /* CmdLineParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF1F220E243A003B2F5C /* CmdLineParser.h */; };
+		7619BF54220E243D003B2F5C /* Image.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF20220E243B003B2F5C /* Image.h */; };
+		7619BF5A220E243D003B2F5C /* FEMTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF26220E243B003B2F5C /* FEMTree.h */; };
+		7619BF5B220E243D003B2F5C /* PlyFile.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF27220E243B003B2F5C /* PlyFile.h */; };
+		7619BF5E220E243D003B2F5C /* FunctionData.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF2A220E243B003B2F5C /* FunctionData.h */; };
+		7619BF5F220E243D003B2F5C /* MAT.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF2B220E243B003B2F5C /* MAT.h */; };
+		7619BF60220E243D003B2F5C /* Window.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF2C220E243B003B2F5C /* Window.h */; };
+		7619BF62220E243D003B2F5C /* PNG.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF2E220E243B003B2F5C /* PNG.h */; };
+		7619BF63220E243D003B2F5C /* Array.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF2F220E243B003B2F5C /* Array.h */; };
+		7619BF66220E243D003B2F5C /* BinaryNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF32220E243B003B2F5C /* BinaryNode.h */; };
+		7619BF67220E243D003B2F5C /* Geometry.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF33220E243B003B2F5C /* Geometry.h */; };
+		7619BF68220E243D003B2F5C /* RegularTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF34220E243B003B2F5C /* RegularTree.h */; };
+		7619BF6E220E243D003B2F5C /* BSplineData.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF3A220E243B003B2F5C /* BSplineData.h */; };
+		7619BF6F220E243D003B2F5C /* MarchingCubes.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF3B220E243B003B2F5C /* MarchingCubes.h */; };
+		7619BF72220E243D003B2F5C /* SparseMatrix.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF3E220E243C003B2F5C /* SparseMatrix.h */; };
+		7619BF73220E243D003B2F5C /* Polynomial.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF3F220E243C003B2F5C /* Polynomial.h */; };
+		7619BF77220E243D003B2F5C /* Allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF43220E243C003B2F5C /* Allocator.h */; };
+		7619BF78220E243D003B2F5C /* MyMiscellany.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF44220E243C003B2F5C /* MyMiscellany.h */; };
+		7619BF79220E243D003B2F5C /* PPolynomial.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF45220E243C003B2F5C /* PPolynomial.h */; };
+		7619BF7B220E243D003B2F5C /* Ply.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF47220E243C003B2F5C /* Ply.h */; };
+		7619BF7D220E243D003B2F5C /* LinearSolvers.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF49220E243C003B2F5C /* LinearSolvers.h */; };
+		7619BF7F220E243D003B2F5C /* SparseMatrixInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF4B220E243C003B2F5C /* SparseMatrixInterface.h */; };
+		7619BF80220E243D003B2F5C /* JPEG.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF4C220E243C003B2F5C /* JPEG.h */; };
+		7619BF82220E243D003B2F5C /* PointStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF4E220E243D003B2F5C /* PointStream.h */; };
+		7619BF83220E243D003B2F5C /* PointStreamData.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF4F220E243D003B2F5C /* PointStreamData.h */; };
+		7619BF84220E243D003B2F5C /* Factor.h in Headers */ = {isa = PBXBuildFile; fileRef = 7619BF50220E243D003B2F5C /* Factor.h */; };
+		762D5A942214ACBA0066EE74 /* ExecuteEntryFunctions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 762D5A922214ACBA0066EE74 /* ExecuteEntryFunctions.cpp */; };
+		762D5A952214ACBA0066EE74 /* ExecuteEntryFunctions.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 762D5A932214ACBA0066EE74 /* ExecuteEntryFunctions.hpp */; };
+		762D5A9A2214BCC50066EE74 /* Parameters.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 762D5A992214BCC50066EE74 /* Parameters.hpp */; };
+		762D5AD4221630910066EE74 /* MeshingOperation.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7691CB1120927E6B0029084C /* MeshingOperation.mm */; };
+		762D5AD5221630A00066EE74 /* ExecuteEntryFunctions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 762D5A922214ACBA0066EE74 /* ExecuteEntryFunctions.cpp */; };
+		762D5B27221638900066EE74 /* PlyFile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7619BF1E220E243A003B2F5C /* PlyFile.cpp */; };
+		7664E3D82213436600BC5377 /* PoissonReconExecute.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 7664E3D72213436600BC5377 /* PoissonReconExecute.hpp */; };
+		7664E3DF22138EE600BC5377 /* SurfaceTrimmerExecute.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 7664E3DD22138EE600BC5377 /* SurfaceTrimmerExecute.hpp */; };
+		76734EC1221273D9008CFD95 /* jmemmgr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E86221273D9008CFD95 /* jmemmgr.cpp */; };
+		76734EC2221273D9008CFD95 /* jdhuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E87221273D9008CFD95 /* jdhuff.cpp */; };
+		76734EC3221273D9008CFD95 /* jpeglib.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734E88221273D9008CFD95 /* jpeglib.h */; };
+		76734EC4221273D9008CFD95 /* jcapimin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E89221273D9008CFD95 /* jcapimin.cpp */; };
+		76734EC5221273D9008CFD95 /* jcprepct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E8A221273D9008CFD95 /* jcprepct.cpp */; };
+		76734EC6221273D9008CFD95 /* jidctred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E8B221273D9008CFD95 /* jidctred.cpp */; };
+		76734EC7221273D9008CFD95 /* jerror.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734E8C221273D9008CFD95 /* jerror.h */; };
+		76734EC8221273D9008CFD95 /* jidctflt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E8D221273D9008CFD95 /* jidctflt.cpp */; };
+		76734EC9221273D9008CFD95 /* jdmainct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E8E221273D9008CFD95 /* jdmainct.cpp */; };
+		76734ECA221273D9008CFD95 /* jdphuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E8F221273D9008CFD95 /* jdphuff.cpp */; };
+		76734ECB221273D9008CFD95 /* jdmaster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E90221273D9008CFD95 /* jdmaster.cpp */; };
+		76734ECC221273D9008CFD95 /* jcapistd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E91221273D9008CFD95 /* jcapistd.cpp */; };
+		76734ECD221273D9008CFD95 /* jcmainct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E92221273D9008CFD95 /* jcmainct.cpp */; };
+		76734ECE221273D9008CFD95 /* jdpostct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E93221273D9008CFD95 /* jdpostct.cpp */; };
+		76734ECF221273D9008CFD95 /* jdatadst.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E94221273D9008CFD95 /* jdatadst.cpp */; };
+		76734ED1221273D9008CFD95 /* jcomapi.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E96221273D9008CFD95 /* jcomapi.cpp */; };
+		76734ED2221273D9008CFD95 /* jmemnobs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E97221273D9008CFD95 /* jmemnobs.cpp */; };
+		76734ED3221273D9008CFD95 /* jcphuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E98221273D9008CFD95 /* jcphuff.cpp */; };
+		76734ED4221273D9008CFD95 /* jdapistd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E99221273D9008CFD95 /* jdapistd.cpp */; };
+		76734ED5221273D9008CFD95 /* jcmaster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9A221273D9008CFD95 /* jcmaster.cpp */; };
+		76734ED6221273D9008CFD95 /* jidctint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9B221273D9008CFD95 /* jidctint.cpp */; };
+		76734ED7221273D9008CFD95 /* jdapimin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9C221273D9008CFD95 /* jdapimin.cpp */; };
+		76734ED8221273D9008CFD95 /* jcparam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9D221273D9008CFD95 /* jcparam.cpp */; };
+		76734ED9221273D9008CFD95 /* jidctfst.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9E221273D9008CFD95 /* jidctfst.cpp */; };
+		76734EDA221273D9008CFD95 /* jquant2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734E9F221273D9008CFD95 /* jquant2.cpp */; };
+		76734EDB221273D9008CFD95 /* jquant1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA0221273D9008CFD95 /* jquant1.cpp */; };
+		76734EDC221273D9008CFD95 /* jccolor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA1221273D9008CFD95 /* jccolor.cpp */; };
+		76734EDD221273D9008CFD95 /* jctrans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA2221273D9008CFD95 /* jctrans.cpp */; };
+		76734EDE221273D9008CFD95 /* jdhuff.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EA3221273D9008CFD95 /* jdhuff.h */; };
+		76734EDF221273D9008CFD95 /* jmemsys.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EA4221273D9008CFD95 /* jmemsys.h */; };
+		76734EE0221273D9008CFD95 /* jutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA5221273D9008CFD95 /* jutils.cpp */; };
+		76734EE1221273D9008CFD95 /* jdatasrc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA6221273D9008CFD95 /* jdatasrc.cpp */; };
+		76734EE2221273D9008CFD95 /* jdinput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA7221273D9008CFD95 /* jdinput.cpp */; };
+		76734EE3221273D9008CFD95 /* jcdctmgr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA8221273D9008CFD95 /* jcdctmgr.cpp */; };
+		76734EE4221273D9008CFD95 /* jdmarker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EA9221273D9008CFD95 /* jdmarker.cpp */; };
+		76734EE5221273D9008CFD95 /* jchuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EAA221273D9008CFD95 /* jchuff.cpp */; };
+		76734EE6221273D9008CFD95 /* jconfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EAB221273D9008CFD95 /* jconfig.h */; };
+		76734EE7221273D9008CFD95 /* jdcoefct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EAC221273D9008CFD95 /* jdcoefct.cpp */; };
+		76734EE8221273D9008CFD95 /* jmorecfg.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EAD221273D9008CFD95 /* jmorecfg.h */; };
+		76734EE9221273D9008CFD95 /* jdct.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EAE221273D9008CFD95 /* jdct.h */; };
+		76734EEA221273D9008CFD95 /* jfdctflt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EAF221273D9008CFD95 /* jfdctflt.cpp */; };
+		76734EEB221273D9008CFD95 /* jversion.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EB0221273D9008CFD95 /* jversion.h */; };
+		76734EEC221273D9008CFD95 /* jcsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB1221273D9008CFD95 /* jcsample.cpp */; };
+		76734EED221273D9008CFD95 /* jinclude.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EB2221273D9008CFD95 /* jinclude.h */; };
+		76734EEE221273D9008CFD95 /* jfdctint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB3221273D9008CFD95 /* jfdctint.cpp */; };
+		76734EEF221273D9008CFD95 /* jccoefct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB4221273D9008CFD95 /* jccoefct.cpp */; };
+		76734EF0221273D9008CFD95 /* jdsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB5221273D9008CFD95 /* jdsample.cpp */; };
+		76734EF1221273D9008CFD95 /* jcinit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB6221273D9008CFD95 /* jcinit.cpp */; };
+		76734EF2221273D9008CFD95 /* jdmerge.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB7221273D9008CFD95 /* jdmerge.cpp */; };
+		76734EF3221273D9008CFD95 /* jerror.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB8221273D9008CFD95 /* jerror.cpp */; };
+		76734EF4221273D9008CFD95 /* jdcolor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EB9221273D9008CFD95 /* jdcolor.cpp */; };
+		76734EF5221273D9008CFD95 /* jdtrans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EBA221273D9008CFD95 /* jdtrans.cpp */; };
+		76734EF6221273D9008CFD95 /* jddctmgr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EBB221273D9008CFD95 /* jddctmgr.cpp */; };
+		76734EF7221273D9008CFD95 /* jfdctfst.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EBC221273D9008CFD95 /* jfdctfst.cpp */; };
+		76734EF8221273D9008CFD95 /* jchuff.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EBD221273D9008CFD95 /* jchuff.h */; };
+		76734EF9221273D9008CFD95 /* jcmarker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 76734EBE221273D9008CFD95 /* jcmarker.cpp */; };
+		76734EFA221273D9008CFD95 /* jpegint.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734EBF221273D9008CFD95 /* jpegint.h */; };
+		76734F13221273F8008CFD95 /* pngrio.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734EFD221273F8008CFD95 /* pngrio.c */; };
+		76734F14221273F8008CFD95 /* pngerror.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734EFE221273F8008CFD95 /* pngerror.c */; };
+		76734F15221273F8008CFD95 /* pngwio.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734EFF221273F8008CFD95 /* pngwio.c */; };
+		76734F16221273F8008CFD95 /* pngtrans.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F00221273F8008CFD95 /* pngtrans.c */; };
+		76734F18221273F8008CFD95 /* pngwrite.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F02221273F8008CFD95 /* pngwrite.c */; };
+		76734F19221273F8008CFD95 /* pngasmrd.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F03221273F8008CFD95 /* pngasmrd.h */; };
+		76734F1A221273F8008CFD95 /* pngvcrd.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F04221273F8008CFD95 /* pngvcrd.c */; };
+		76734F1B221273F8008CFD95 /* png.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F05221273F8008CFD95 /* png.h */; };
+		76734F1C221273F8008CFD95 /* pngwutil.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F06221273F8008CFD95 /* pngwutil.c */; };
+		76734F1D221273F8008CFD95 /* pngwtran.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F07221273F8008CFD95 /* pngwtran.c */; };
+		76734F1E221273F8008CFD95 /* pnggccrd.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F08221273F8008CFD95 /* pnggccrd.c */; };
+		76734F1F221273F8008CFD95 /* pngconf.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F09221273F8008CFD95 /* pngconf.h */; };
+		76734F20221273F8008CFD95 /* pngpread.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0A221273F8008CFD95 /* pngpread.c */; };
+		76734F21221273F8008CFD95 /* pngread.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0B221273F8008CFD95 /* pngread.c */; };
+		76734F22221273F8008CFD95 /* pngmem.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0C221273F8008CFD95 /* pngmem.c */; };
+		76734F23221273F8008CFD95 /* pngget.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0D221273F8008CFD95 /* pngget.c */; };
+		76734F24221273F8008CFD95 /* png.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0E221273F8008CFD95 /* png.c */; };
+		76734F25221273F8008CFD95 /* pngrtran.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F0F221273F8008CFD95 /* pngrtran.c */; };
+		76734F26221273F8008CFD95 /* pngrutil.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F10221273F8008CFD95 /* pngrutil.c */; };
+		76734F27221273F8008CFD95 /* pngset.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F11221273F8008CFD95 /* pngset.c */; };
+		76734F4422127434008CFD95 /* zutil.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F2922127434008CFD95 /* zutil.h */; };
+		76734F4522127434008CFD95 /* inftrees.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F2A22127434008CFD95 /* inftrees.h */; };
+		76734F4622127434008CFD95 /* inflate.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F2B22127434008CFD95 /* inflate.c */; };
+		76734F4722127434008CFD95 /* compress.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F2C22127434008CFD95 /* compress.c */; };
+		76734F4822127434008CFD95 /* deflate.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F2D22127434008CFD95 /* deflate.c */; };
+		76734F4922127434008CFD95 /* infutil.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F2E22127434008CFD95 /* infutil.c */; };
+		76734F4A22127434008CFD95 /* inffixed.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F2F22127434008CFD95 /* inffixed.h */; };
+		76734F4C22127434008CFD95 /* infcodes.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3122127434008CFD95 /* infcodes.h */; };
+		76734F4D22127434008CFD95 /* trees.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3222127434008CFD95 /* trees.h */; };
+		76734F4E22127434008CFD95 /* infblock.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3322127434008CFD95 /* infblock.c */; };
+		76734F4F22127434008CFD95 /* inffast.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3422127434008CFD95 /* inffast.h */; };
+		76734F5022127434008CFD95 /* crc32.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3522127434008CFD95 /* crc32.c */; };
+		76734F5122127434008CFD95 /* zutil.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3622127434008CFD95 /* zutil.c */; };
+		76734F5222127434008CFD95 /* deflate.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3722127434008CFD95 /* deflate.h */; };
+		76734F5322127434008CFD95 /* zlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3822127434008CFD95 /* zlib.h */; };
+		76734F5522127434008CFD95 /* inftrees.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3A22127434008CFD95 /* inftrees.c */; };
+		76734F5622127434008CFD95 /* uncompr.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3B22127434008CFD95 /* uncompr.c */; };
+		76734F5722127434008CFD95 /* infblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3C22127434008CFD95 /* infblock.h */; };
+		76734F5822127434008CFD95 /* trees.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3D22127434008CFD95 /* trees.c */; };
+		76734F5922127434008CFD95 /* infcodes.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F3E22127434008CFD95 /* infcodes.c */; };
+		76734F5A22127434008CFD95 /* infutil.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F3F22127434008CFD95 /* infutil.h */; };
+		76734F5B22127434008CFD95 /* gzio.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F4022127434008CFD95 /* gzio.c */; };
+		76734F5C22127434008CFD95 /* inffast.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F4122127434008CFD95 /* inffast.c */; };
+		76734F5D22127434008CFD95 /* adler32.c in Sources */ = {isa = PBXBuildFile; fileRef = 76734F4222127434008CFD95 /* adler32.c */; };
+		76734F5E22127434008CFD95 /* zconf.h in Headers */ = {isa = PBXBuildFile; fileRef = 76734F4322127434008CFD95 /* zconf.h */; };
+		7691CAEF20927C200029084C /* Meshing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7691CAE520927C200029084C /* Meshing.framework */; };
+		7691CAF420927C200029084C /* MeshingTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 7691CAF320927C200029084C /* MeshingTests.m */; };
+		7691CAF620927C200029084C /* Meshing.h in Headers */ = {isa = PBXBuildFile; fileRef = 7691CAE820927C200029084C /* Meshing.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		7691CB1220927E6B0029084C /* MeshingOperation.h in Headers */ = {isa = PBXBuildFile; fileRef = 7691CB1020927E6B0029084C /* MeshingOperation.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		7691CB1320927E6B0029084C /* MeshingOperation.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7691CB1120927E6B0029084C /* MeshingOperation.mm */; };
+		76EBE55D20A2769C0052126A /* app_scan.ply in Resources */ = {isa = PBXBuildFile; fileRef = 76EBE55C20A2769C0052126A /* app_scan.ply */; };
+		76F402CF20A285DF002FE190 /* app_scan.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 76F402CE20A285DF002FE190 /* app_scan.jpg */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		7691CAF020927C200029084C /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 7691CADC20927C200029084C /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 7691CAE420927C200029084C;
+			remoteInfo = PoissonRecon;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		762D5ABF22162C0B0066EE74 /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "include/$(PRODUCT_NAME)";
+			dstSubfolderSpec = 16;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		760D862D22127FE9008E1458 /* PoissonRecon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PoissonRecon.cpp; sourceTree = "<group>"; };
+		760D863A2212869C008E1458 /* SurfaceTrimmer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SurfaceTrimmer.cpp; sourceTree = "<group>"; };
+		7619BF1E220E243A003B2F5C /* PlyFile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PlyFile.cpp; sourceTree = "<group>"; };
+		7619BF1F220E243A003B2F5C /* CmdLineParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CmdLineParser.h; sourceTree = "<group>"; };
+		7619BF20220E243B003B2F5C /* Image.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Image.h; sourceTree = "<group>"; };
+		7619BF21220E243B003B2F5C /* PNG.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = PNG.inl; sourceTree = "<group>"; };
+		7619BF22220E243B003B2F5C /* FEMTree.Initialize.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.Initialize.inl; sourceTree = "<group>"; };
+		7619BF23220E243B003B2F5C /* Polynomial.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Polynomial.inl; sourceTree = "<group>"; };
+		7619BF24220E243B003B2F5C /* FEMTree.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.inl; sourceTree = "<group>"; };
+		7619BF25220E243B003B2F5C /* RegularTree.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = RegularTree.inl; sourceTree = "<group>"; };
+		7619BF26220E243B003B2F5C /* FEMTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FEMTree.h; sourceTree = "<group>"; };
+		7619BF27220E243B003B2F5C /* PlyFile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PlyFile.h; sourceTree = "<group>"; };
+		7619BF28220E243B003B2F5C /* FEMTree.System.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.System.inl; sourceTree = "<group>"; };
+		7619BF29220E243B003B2F5C /* Geometry.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Geometry.inl; sourceTree = "<group>"; };
+		7619BF2A220E243B003B2F5C /* FunctionData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FunctionData.h; sourceTree = "<group>"; };
+		7619BF2B220E243B003B2F5C /* MAT.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MAT.h; sourceTree = "<group>"; };
+		7619BF2C220E243B003B2F5C /* Window.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Window.h; sourceTree = "<group>"; };
+		7619BF2E220E243B003B2F5C /* PNG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PNG.h; sourceTree = "<group>"; };
+		7619BF2F220E243B003B2F5C /* Array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Array.h; sourceTree = "<group>"; };
+		7619BF30220E243B003B2F5C /* MAT.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = MAT.inl; sourceTree = "<group>"; };
+		7619BF31220E243B003B2F5C /* CmdLineParser.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = CmdLineParser.inl; sourceTree = "<group>"; };
+		7619BF32220E243B003B2F5C /* BinaryNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BinaryNode.h; sourceTree = "<group>"; };
+		7619BF33220E243B003B2F5C /* Geometry.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Geometry.h; sourceTree = "<group>"; };
+		7619BF34220E243B003B2F5C /* RegularTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RegularTree.h; sourceTree = "<group>"; };
+		7619BF35220E243B003B2F5C /* SparseMatrixInterface.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = SparseMatrixInterface.inl; sourceTree = "<group>"; };
+		7619BF36220E243B003B2F5C /* FunctionData.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FunctionData.inl; sourceTree = "<group>"; };
+		7619BF37220E243B003B2F5C /* JPEG.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = JPEG.inl; sourceTree = "<group>"; };
+		7619BF38220E243B003B2F5C /* BSplineData.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = BSplineData.inl; sourceTree = "<group>"; };
+		7619BF39220E243B003B2F5C /* BMPStream.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = BMPStream.inl; sourceTree = "<group>"; };
+		7619BF3A220E243B003B2F5C /* BSplineData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BSplineData.h; sourceTree = "<group>"; };
+		7619BF3B220E243B003B2F5C /* MarchingCubes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MarchingCubes.h; sourceTree = "<group>"; };
+		7619BF3C220E243C003B2F5C /* FEMTree.SortedTreeNodes.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.SortedTreeNodes.inl; sourceTree = "<group>"; };
+		7619BF3D220E243C003B2F5C /* Array.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Array.inl; sourceTree = "<group>"; };
+		7619BF3E220E243C003B2F5C /* SparseMatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SparseMatrix.h; sourceTree = "<group>"; };
+		7619BF3F220E243C003B2F5C /* Polynomial.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Polynomial.h; sourceTree = "<group>"; };
+		7619BF41220E243C003B2F5C /* FEMTree.WeightedSamples.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.WeightedSamples.inl; sourceTree = "<group>"; };
+		7619BF42220E243C003B2F5C /* Window.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Window.inl; sourceTree = "<group>"; };
+		7619BF43220E243C003B2F5C /* Allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Allocator.h; sourceTree = "<group>"; };
+		7619BF44220E243C003B2F5C /* MyMiscellany.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MyMiscellany.h; sourceTree = "<group>"; };
+		7619BF45220E243C003B2F5C /* PPolynomial.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PPolynomial.h; sourceTree = "<group>"; };
+		7619BF46220E243C003B2F5C /* PPolynomial.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = PPolynomial.inl; sourceTree = "<group>"; };
+		7619BF47220E243C003B2F5C /* Ply.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Ply.h; sourceTree = "<group>"; };
+		7619BF48220E243C003B2F5C /* PointStream.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = PointStream.inl; sourceTree = "<group>"; };
+		7619BF49220E243C003B2F5C /* LinearSolvers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LinearSolvers.h; sourceTree = "<group>"; };
+		7619BF4A220E243C003B2F5C /* SparseMatrix.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = SparseMatrix.inl; sourceTree = "<group>"; };
+		7619BF4B220E243C003B2F5C /* SparseMatrixInterface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SparseMatrixInterface.h; sourceTree = "<group>"; };
+		7619BF4C220E243C003B2F5C /* JPEG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JPEG.h; sourceTree = "<group>"; };
+		7619BF4D220E243D003B2F5C /* FEMTree.IsoSurface.specialized.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.IsoSurface.specialized.inl; sourceTree = "<group>"; };
+		7619BF4E220E243D003B2F5C /* PointStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PointStream.h; sourceTree = "<group>"; };
+		7619BF4F220E243D003B2F5C /* PointStreamData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PointStreamData.h; sourceTree = "<group>"; };
+		7619BF50220E243D003B2F5C /* Factor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Factor.h; sourceTree = "<group>"; };
+		7619BF51220E243D003B2F5C /* FEMTree.Evaluation.inl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = FEMTree.Evaluation.inl; sourceTree = "<group>"; };
+		762D5A922214ACBA0066EE74 /* ExecuteEntryFunctions.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = ExecuteEntryFunctions.cpp; sourceTree = "<group>"; };
+		762D5A932214ACBA0066EE74 /* ExecuteEntryFunctions.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = ExecuteEntryFunctions.hpp; sourceTree = "<group>"; };
+		762D5A992214BCC50066EE74 /* Parameters.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Parameters.hpp; sourceTree = "<group>"; };
+		762D5AC122162C0B0066EE74 /* libmeshing.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libmeshing.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		7664E3D72213436600BC5377 /* PoissonReconExecute.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = PoissonReconExecute.hpp; sourceTree = "<group>"; };
+		7664E3DD22138EE600BC5377 /* SurfaceTrimmerExecute.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SurfaceTrimmerExecute.hpp; sourceTree = "<group>"; };
+		76734E86221273D9008CFD95 /* jmemmgr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jmemmgr.cpp; sourceTree = "<group>"; };
+		76734E87221273D9008CFD95 /* jdhuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdhuff.cpp; sourceTree = "<group>"; };
+		76734E88221273D9008CFD95 /* jpeglib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jpeglib.h; sourceTree = "<group>"; };
+		76734E89221273D9008CFD95 /* jcapimin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcapimin.cpp; sourceTree = "<group>"; };
+		76734E8A221273D9008CFD95 /* jcprepct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcprepct.cpp; sourceTree = "<group>"; };
+		76734E8B221273D9008CFD95 /* jidctred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jidctred.cpp; sourceTree = "<group>"; };
+		76734E8C221273D9008CFD95 /* jerror.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jerror.h; sourceTree = "<group>"; };
+		76734E8D221273D9008CFD95 /* jidctflt.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jidctflt.cpp; sourceTree = "<group>"; };
+		76734E8E221273D9008CFD95 /* jdmainct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdmainct.cpp; sourceTree = "<group>"; };
+		76734E8F221273D9008CFD95 /* jdphuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdphuff.cpp; sourceTree = "<group>"; };
+		76734E90221273D9008CFD95 /* jdmaster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdmaster.cpp; sourceTree = "<group>"; };
+		76734E91221273D9008CFD95 /* jcapistd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcapistd.cpp; sourceTree = "<group>"; };
+		76734E92221273D9008CFD95 /* jcmainct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcmainct.cpp; sourceTree = "<group>"; };
+		76734E93221273D9008CFD95 /* jdpostct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdpostct.cpp; sourceTree = "<group>"; };
+		76734E94221273D9008CFD95 /* jdatadst.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdatadst.cpp; sourceTree = "<group>"; };
+		76734E96221273D9008CFD95 /* jcomapi.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcomapi.cpp; sourceTree = "<group>"; };
+		76734E97221273D9008CFD95 /* jmemnobs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jmemnobs.cpp; sourceTree = "<group>"; };
+		76734E98221273D9008CFD95 /* jcphuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcphuff.cpp; sourceTree = "<group>"; };
+		76734E99221273D9008CFD95 /* jdapistd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdapistd.cpp; sourceTree = "<group>"; };
+		76734E9A221273D9008CFD95 /* jcmaster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcmaster.cpp; sourceTree = "<group>"; };
+		76734E9B221273D9008CFD95 /* jidctint.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jidctint.cpp; sourceTree = "<group>"; };
+		76734E9C221273D9008CFD95 /* jdapimin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdapimin.cpp; sourceTree = "<group>"; };
+		76734E9D221273D9008CFD95 /* jcparam.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcparam.cpp; sourceTree = "<group>"; };
+		76734E9E221273D9008CFD95 /* jidctfst.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jidctfst.cpp; sourceTree = "<group>"; };
+		76734E9F221273D9008CFD95 /* jquant2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jquant2.cpp; sourceTree = "<group>"; };
+		76734EA0221273D9008CFD95 /* jquant1.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jquant1.cpp; sourceTree = "<group>"; };
+		76734EA1221273D9008CFD95 /* jccolor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jccolor.cpp; sourceTree = "<group>"; };
+		76734EA2221273D9008CFD95 /* jctrans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jctrans.cpp; sourceTree = "<group>"; };
+		76734EA3221273D9008CFD95 /* jdhuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jdhuff.h; sourceTree = "<group>"; };
+		76734EA4221273D9008CFD95 /* jmemsys.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jmemsys.h; sourceTree = "<group>"; };
+		76734EA5221273D9008CFD95 /* jutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jutils.cpp; sourceTree = "<group>"; };
+		76734EA6221273D9008CFD95 /* jdatasrc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdatasrc.cpp; sourceTree = "<group>"; };
+		76734EA7221273D9008CFD95 /* jdinput.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdinput.cpp; sourceTree = "<group>"; };
+		76734EA8221273D9008CFD95 /* jcdctmgr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcdctmgr.cpp; sourceTree = "<group>"; };
+		76734EA9221273D9008CFD95 /* jdmarker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdmarker.cpp; sourceTree = "<group>"; };
+		76734EAA221273D9008CFD95 /* jchuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jchuff.cpp; sourceTree = "<group>"; };
+		76734EAB221273D9008CFD95 /* jconfig.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jconfig.h; sourceTree = "<group>"; };
+		76734EAC221273D9008CFD95 /* jdcoefct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdcoefct.cpp; sourceTree = "<group>"; };
+		76734EAD221273D9008CFD95 /* jmorecfg.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jmorecfg.h; sourceTree = "<group>"; };
+		76734EAE221273D9008CFD95 /* jdct.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jdct.h; sourceTree = "<group>"; };
+		76734EAF221273D9008CFD95 /* jfdctflt.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jfdctflt.cpp; sourceTree = "<group>"; };
+		76734EB0221273D9008CFD95 /* jversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jversion.h; sourceTree = "<group>"; };
+		76734EB1221273D9008CFD95 /* jcsample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcsample.cpp; sourceTree = "<group>"; };
+		76734EB2221273D9008CFD95 /* jinclude.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jinclude.h; sourceTree = "<group>"; };
+		76734EB3221273D9008CFD95 /* jfdctint.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jfdctint.cpp; sourceTree = "<group>"; };
+		76734EB4221273D9008CFD95 /* jccoefct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jccoefct.cpp; sourceTree = "<group>"; };
+		76734EB5221273D9008CFD95 /* jdsample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdsample.cpp; sourceTree = "<group>"; };
+		76734EB6221273D9008CFD95 /* jcinit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcinit.cpp; sourceTree = "<group>"; };
+		76734EB7221273D9008CFD95 /* jdmerge.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdmerge.cpp; sourceTree = "<group>"; };
+		76734EB8221273D9008CFD95 /* jerror.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jerror.cpp; sourceTree = "<group>"; };
+		76734EB9221273D9008CFD95 /* jdcolor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdcolor.cpp; sourceTree = "<group>"; };
+		76734EBA221273D9008CFD95 /* jdtrans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jdtrans.cpp; sourceTree = "<group>"; };
+		76734EBB221273D9008CFD95 /* jddctmgr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jddctmgr.cpp; sourceTree = "<group>"; };
+		76734EBC221273D9008CFD95 /* jfdctfst.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jfdctfst.cpp; sourceTree = "<group>"; };
+		76734EBD221273D9008CFD95 /* jchuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jchuff.h; sourceTree = "<group>"; };
+		76734EBE221273D9008CFD95 /* jcmarker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jcmarker.cpp; sourceTree = "<group>"; };
+		76734EBF221273D9008CFD95 /* jpegint.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jpegint.h; sourceTree = "<group>"; };
+		76734EFD221273F8008CFD95 /* pngrio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngrio.c; sourceTree = "<group>"; };
+		76734EFE221273F8008CFD95 /* pngerror.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngerror.c; sourceTree = "<group>"; };
+		76734EFF221273F8008CFD95 /* pngwio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngwio.c; sourceTree = "<group>"; };
+		76734F00221273F8008CFD95 /* pngtrans.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngtrans.c; sourceTree = "<group>"; };
+		76734F02221273F8008CFD95 /* pngwrite.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngwrite.c; sourceTree = "<group>"; };
+		76734F03221273F8008CFD95 /* pngasmrd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pngasmrd.h; sourceTree = "<group>"; };
+		76734F04221273F8008CFD95 /* pngvcrd.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngvcrd.c; sourceTree = "<group>"; };
+		76734F05221273F8008CFD95 /* png.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = png.h; sourceTree = "<group>"; };
+		76734F06221273F8008CFD95 /* pngwutil.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngwutil.c; sourceTree = "<group>"; };
+		76734F07221273F8008CFD95 /* pngwtran.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngwtran.c; sourceTree = "<group>"; };
+		76734F08221273F8008CFD95 /* pnggccrd.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pnggccrd.c; sourceTree = "<group>"; };
+		76734F09221273F8008CFD95 /* pngconf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pngconf.h; sourceTree = "<group>"; };
+		76734F0A221273F8008CFD95 /* pngpread.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngpread.c; sourceTree = "<group>"; };
+		76734F0B221273F8008CFD95 /* pngread.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngread.c; sourceTree = "<group>"; };
+		76734F0C221273F8008CFD95 /* pngmem.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngmem.c; sourceTree = "<group>"; };
+		76734F0D221273F8008CFD95 /* pngget.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngget.c; sourceTree = "<group>"; };
+		76734F0E221273F8008CFD95 /* png.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = png.c; sourceTree = "<group>"; };
+		76734F0F221273F8008CFD95 /* pngrtran.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngrtran.c; sourceTree = "<group>"; };
+		76734F10221273F8008CFD95 /* pngrutil.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngrutil.c; sourceTree = "<group>"; };
+		76734F11221273F8008CFD95 /* pngset.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pngset.c; sourceTree = "<group>"; };
+		76734F2922127434008CFD95 /* zutil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = zutil.h; sourceTree = "<group>"; };
+		76734F2A22127434008CFD95 /* inftrees.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = inftrees.h; sourceTree = "<group>"; };
+		76734F2B22127434008CFD95 /* inflate.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = inflate.c; sourceTree = "<group>"; };
+		76734F2C22127434008CFD95 /* compress.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = compress.c; sourceTree = "<group>"; };
+		76734F2D22127434008CFD95 /* deflate.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = deflate.c; sourceTree = "<group>"; };
+		76734F2E22127434008CFD95 /* infutil.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = infutil.c; sourceTree = "<group>"; };
+		76734F2F22127434008CFD95 /* inffixed.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = inffixed.h; sourceTree = "<group>"; };
+		76734F3122127434008CFD95 /* infcodes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = infcodes.h; sourceTree = "<group>"; };
+		76734F3222127434008CFD95 /* trees.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trees.h; sourceTree = "<group>"; };
+		76734F3322127434008CFD95 /* infblock.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = infblock.c; sourceTree = "<group>"; };
+		76734F3422127434008CFD95 /* inffast.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = inffast.h; sourceTree = "<group>"; };
+		76734F3522127434008CFD95 /* crc32.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = crc32.c; sourceTree = "<group>"; };
+		76734F3622127434008CFD95 /* zutil.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = zutil.c; sourceTree = "<group>"; };
+		76734F3722127434008CFD95 /* deflate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deflate.h; sourceTree = "<group>"; };
+		76734F3822127434008CFD95 /* zlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = zlib.h; sourceTree = "<group>"; };
+		76734F3A22127434008CFD95 /* inftrees.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = inftrees.c; sourceTree = "<group>"; };
+		76734F3B22127434008CFD95 /* uncompr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = uncompr.c; sourceTree = "<group>"; };
+		76734F3C22127434008CFD95 /* infblock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = infblock.h; sourceTree = "<group>"; };
+		76734F3D22127434008CFD95 /* trees.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = trees.c; sourceTree = "<group>"; };
+		76734F3E22127434008CFD95 /* infcodes.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = infcodes.c; sourceTree = "<group>"; };
+		76734F3F22127434008CFD95 /* infutil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = infutil.h; sourceTree = "<group>"; };
+		76734F4022127434008CFD95 /* gzio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = gzio.c; sourceTree = "<group>"; };
+		76734F4122127434008CFD95 /* inffast.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = inffast.c; sourceTree = "<group>"; };
+		76734F4222127434008CFD95 /* adler32.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = adler32.c; sourceTree = "<group>"; };
+		76734F4322127434008CFD95 /* zconf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = zconf.h; sourceTree = "<group>"; };
+		7691CAE520927C200029084C /* Meshing.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Meshing.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		7691CAE820927C200029084C /* Meshing.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Meshing.h; sourceTree = "<group>"; };
+		7691CAE920927C200029084C /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		7691CAEE20927C200029084C /* MeshingTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = MeshingTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
+		7691CAF320927C200029084C /* MeshingTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = MeshingTests.m; sourceTree = "<group>"; };
+		7691CAF520927C200029084C /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		7691CB1020927E6B0029084C /* MeshingOperation.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MeshingOperation.h; sourceTree = "<group>"; };
+		7691CB1120927E6B0029084C /* MeshingOperation.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MeshingOperation.mm; sourceTree = "<group>"; };
+		76EBE55C20A2769C0052126A /* app_scan.ply */ = {isa = PBXFileReference; lastKnownFileType = file; path = app_scan.ply; sourceTree = "<group>"; };
+		76F402CE20A285DF002FE190 /* app_scan.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = app_scan.jpg; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		762D5ABE22162C0B0066EE74 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		7691CAE120927C200029084C /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		7691CAEB20927C200029084C /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				7691CAEF20927C200029084C /* Meshing.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		76734E84221273D9008CFD95 /* JPEG */ = {
+			isa = PBXGroup;
+			children = (
+				76734E89221273D9008CFD95 /* jcapimin.cpp */,
+				76734E91221273D9008CFD95 /* jcapistd.cpp */,
+				76734EB4221273D9008CFD95 /* jccoefct.cpp */,
+				76734EA1221273D9008CFD95 /* jccolor.cpp */,
+				76734EA8221273D9008CFD95 /* jcdctmgr.cpp */,
+				76734EAA221273D9008CFD95 /* jchuff.cpp */,
+				76734EBD221273D9008CFD95 /* jchuff.h */,
+				76734EB6221273D9008CFD95 /* jcinit.cpp */,
+				76734E92221273D9008CFD95 /* jcmainct.cpp */,
+				76734EBE221273D9008CFD95 /* jcmarker.cpp */,
+				76734E9A221273D9008CFD95 /* jcmaster.cpp */,
+				76734E96221273D9008CFD95 /* jcomapi.cpp */,
+				76734EAB221273D9008CFD95 /* jconfig.h */,
+				76734E9D221273D9008CFD95 /* jcparam.cpp */,
+				76734E98221273D9008CFD95 /* jcphuff.cpp */,
+				76734E8A221273D9008CFD95 /* jcprepct.cpp */,
+				76734EB1221273D9008CFD95 /* jcsample.cpp */,
+				76734EA2221273D9008CFD95 /* jctrans.cpp */,
+				76734E9C221273D9008CFD95 /* jdapimin.cpp */,
+				76734E99221273D9008CFD95 /* jdapistd.cpp */,
+				76734E94221273D9008CFD95 /* jdatadst.cpp */,
+				76734EA6221273D9008CFD95 /* jdatasrc.cpp */,
+				76734EAC221273D9008CFD95 /* jdcoefct.cpp */,
+				76734EB9221273D9008CFD95 /* jdcolor.cpp */,
+				76734EAE221273D9008CFD95 /* jdct.h */,
+				76734EBB221273D9008CFD95 /* jddctmgr.cpp */,
+				76734E87221273D9008CFD95 /* jdhuff.cpp */,
+				76734EA3221273D9008CFD95 /* jdhuff.h */,
+				76734EA7221273D9008CFD95 /* jdinput.cpp */,
+				76734E8E221273D9008CFD95 /* jdmainct.cpp */,
+				76734EA9221273D9008CFD95 /* jdmarker.cpp */,
+				76734E90221273D9008CFD95 /* jdmaster.cpp */,
+				76734EB7221273D9008CFD95 /* jdmerge.cpp */,
+				76734E8F221273D9008CFD95 /* jdphuff.cpp */,
+				76734E93221273D9008CFD95 /* jdpostct.cpp */,
+				76734EB5221273D9008CFD95 /* jdsample.cpp */,
+				76734EBA221273D9008CFD95 /* jdtrans.cpp */,
+				76734EB8221273D9008CFD95 /* jerror.cpp */,
+				76734E8C221273D9008CFD95 /* jerror.h */,
+				76734EAF221273D9008CFD95 /* jfdctflt.cpp */,
+				76734EBC221273D9008CFD95 /* jfdctfst.cpp */,
+				76734EB3221273D9008CFD95 /* jfdctint.cpp */,
+				76734E8D221273D9008CFD95 /* jidctflt.cpp */,
+				76734E9E221273D9008CFD95 /* jidctfst.cpp */,
+				76734E9B221273D9008CFD95 /* jidctint.cpp */,
+				76734E8B221273D9008CFD95 /* jidctred.cpp */,
+				76734EB2221273D9008CFD95 /* jinclude.h */,
+				76734E86221273D9008CFD95 /* jmemmgr.cpp */,
+				76734E97221273D9008CFD95 /* jmemnobs.cpp */,
+				76734EA4221273D9008CFD95 /* jmemsys.h */,
+				76734EAD221273D9008CFD95 /* jmorecfg.h */,
+				76734EBF221273D9008CFD95 /* jpegint.h */,
+				76734E88221273D9008CFD95 /* jpeglib.h */,
+				76734EA0221273D9008CFD95 /* jquant1.cpp */,
+				76734E9F221273D9008CFD95 /* jquant2.cpp */,
+				76734EA5221273D9008CFD95 /* jutils.cpp */,
+				76734EB0221273D9008CFD95 /* jversion.h */,
+			);
+			name = JPEG;
+			path = ../JPEG;
+			sourceTree = "<group>";
+		};
+		76734EFB221273F8008CFD95 /* PNG */ = {
+			isa = PBXGroup;
+			children = (
+				76734F0E221273F8008CFD95 /* png.c */,
+				76734F05221273F8008CFD95 /* png.h */,
+				76734F03221273F8008CFD95 /* pngasmrd.h */,
+				76734F09221273F8008CFD95 /* pngconf.h */,
+				76734EFE221273F8008CFD95 /* pngerror.c */,
+				76734F08221273F8008CFD95 /* pnggccrd.c */,
+				76734F0D221273F8008CFD95 /* pngget.c */,
+				76734F0C221273F8008CFD95 /* pngmem.c */,
+				76734F0A221273F8008CFD95 /* pngpread.c */,
+				76734F0B221273F8008CFD95 /* pngread.c */,
+				76734EFD221273F8008CFD95 /* pngrio.c */,
+				76734F0F221273F8008CFD95 /* pngrtran.c */,
+				76734F10221273F8008CFD95 /* pngrutil.c */,
+				76734F11221273F8008CFD95 /* pngset.c */,
+				76734F00221273F8008CFD95 /* pngtrans.c */,
+				76734F04221273F8008CFD95 /* pngvcrd.c */,
+				76734EFF221273F8008CFD95 /* pngwio.c */,
+				76734F02221273F8008CFD95 /* pngwrite.c */,
+				76734F07221273F8008CFD95 /* pngwtran.c */,
+				76734F06221273F8008CFD95 /* pngwutil.c */,
+			);
+			name = PNG;
+			path = ../PNG;
+			sourceTree = "<group>";
+		};
+		76734F2822127434008CFD95 /* ZLIB */ = {
+			isa = PBXGroup;
+			children = (
+				76734F4222127434008CFD95 /* adler32.c */,
+				76734F2C22127434008CFD95 /* compress.c */,
+				76734F3522127434008CFD95 /* crc32.c */,
+				76734F2D22127434008CFD95 /* deflate.c */,
+				76734F3722127434008CFD95 /* deflate.h */,
+				76734F4022127434008CFD95 /* gzio.c */,
+				76734F3322127434008CFD95 /* infblock.c */,
+				76734F3C22127434008CFD95 /* infblock.h */,
+				76734F3E22127434008CFD95 /* infcodes.c */,
+				76734F3122127434008CFD95 /* infcodes.h */,
+				76734F4122127434008CFD95 /* inffast.c */,
+				76734F3422127434008CFD95 /* inffast.h */,
+				76734F2F22127434008CFD95 /* inffixed.h */,
+				76734F2B22127434008CFD95 /* inflate.c */,
+				76734F3A22127434008CFD95 /* inftrees.c */,
+				76734F2A22127434008CFD95 /* inftrees.h */,
+				76734F2E22127434008CFD95 /* infutil.c */,
+				76734F3F22127434008CFD95 /* infutil.h */,
+				76734F3D22127434008CFD95 /* trees.c */,
+				76734F3222127434008CFD95 /* trees.h */,
+				76734F3B22127434008CFD95 /* uncompr.c */,
+				76734F4322127434008CFD95 /* zconf.h */,
+				76734F3822127434008CFD95 /* zlib.h */,
+				76734F3622127434008CFD95 /* zutil.c */,
+				76734F2922127434008CFD95 /* zutil.h */,
+			);
+			name = ZLIB;
+			path = ../ZLIB;
+			sourceTree = "<group>";
+		};
+		7691CADB20927C200029084C = {
+			isa = PBXGroup;
+			children = (
+				76EBE4FC20A25B870052126A /* Src */,
+				76734EFB221273F8008CFD95 /* PNG */,
+				76734E84221273D9008CFD95 /* JPEG */,
+				76734F2822127434008CFD95 /* ZLIB */,
+				7691CAE720927C200029084C /* Meshing */,
+				7691CAF220927C200029084C /* MeshingTests */,
+				7691CAE620927C200029084C /* Products */,
+				76EBE55520A2611E0052126A /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		7691CAE620927C200029084C /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				7691CAE520927C200029084C /* Meshing.framework */,
+				7691CAEE20927C200029084C /* MeshingTests.xctest */,
+				762D5AC122162C0B0066EE74 /* libmeshing.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		7691CAE720927C200029084C /* Meshing */ = {
+			isa = PBXGroup;
+			children = (
+				7691CAE920927C200029084C /* Info.plist */,
+				7691CAE820927C200029084C /* Meshing.h */,
+				762D5A992214BCC50066EE74 /* Parameters.hpp */,
+				7691CB1020927E6B0029084C /* MeshingOperation.h */,
+				7691CB1120927E6B0029084C /* MeshingOperation.mm */,
+				762D5A932214ACBA0066EE74 /* ExecuteEntryFunctions.hpp */,
+				762D5A922214ACBA0066EE74 /* ExecuteEntryFunctions.cpp */,
+				7664E3D72213436600BC5377 /* PoissonReconExecute.hpp */,
+				7664E3DD22138EE600BC5377 /* SurfaceTrimmerExecute.hpp */,
+			);
+			path = Meshing;
+			sourceTree = "<group>";
+		};
+		7691CAF220927C200029084C /* MeshingTests */ = {
+			isa = PBXGroup;
+			children = (
+				76EBE55C20A2769C0052126A /* app_scan.ply */,
+				76F402CE20A285DF002FE190 /* app_scan.jpg */,
+				7691CAF320927C200029084C /* MeshingTests.m */,
+				7691CAF520927C200029084C /* Info.plist */,
+			);
+			path = MeshingTests;
+			sourceTree = "<group>";
+		};
+		76EBE4FC20A25B870052126A /* Src */ = {
+			isa = PBXGroup;
+			children = (
+				760D862D22127FE9008E1458 /* PoissonRecon.cpp */,
+				760D863A2212869C008E1458 /* SurfaceTrimmer.cpp */,
+				7619BF43220E243C003B2F5C /* Allocator.h */,
+				7619BF2F220E243B003B2F5C /* Array.h */,
+				7619BF3D220E243C003B2F5C /* Array.inl */,
+				7619BF32220E243B003B2F5C /* BinaryNode.h */,
+				7619BF39220E243B003B2F5C /* BMPStream.inl */,
+				7619BF3A220E243B003B2F5C /* BSplineData.h */,
+				7619BF38220E243B003B2F5C /* BSplineData.inl */,
+				7619BF1F220E243A003B2F5C /* CmdLineParser.h */,
+				7619BF31220E243B003B2F5C /* CmdLineParser.inl */,
+				7619BF50220E243D003B2F5C /* Factor.h */,
+				7619BF51220E243D003B2F5C /* FEMTree.Evaluation.inl */,
+				7619BF26220E243B003B2F5C /* FEMTree.h */,
+				7619BF22220E243B003B2F5C /* FEMTree.Initialize.inl */,
+				7619BF24220E243B003B2F5C /* FEMTree.inl */,
+				7619BF4D220E243D003B2F5C /* FEMTree.IsoSurface.specialized.inl */,
+				7619BF3C220E243C003B2F5C /* FEMTree.SortedTreeNodes.inl */,
+				7619BF28220E243B003B2F5C /* FEMTree.System.inl */,
+				7619BF41220E243C003B2F5C /* FEMTree.WeightedSamples.inl */,
+				7619BF2A220E243B003B2F5C /* FunctionData.h */,
+				7619BF36220E243B003B2F5C /* FunctionData.inl */,
+				7619BF33220E243B003B2F5C /* Geometry.h */,
+				7619BF29220E243B003B2F5C /* Geometry.inl */,
+				7619BF20220E243B003B2F5C /* Image.h */,
+				7619BF4C220E243C003B2F5C /* JPEG.h */,
+				7619BF37220E243B003B2F5C /* JPEG.inl */,
+				7619BF49220E243C003B2F5C /* LinearSolvers.h */,
+				7619BF3B220E243B003B2F5C /* MarchingCubes.h */,
+				7619BF2B220E243B003B2F5C /* MAT.h */,
+				7619BF30220E243B003B2F5C /* MAT.inl */,
+				7619BF44220E243C003B2F5C /* MyMiscellany.h */,
+				7619BF47220E243C003B2F5C /* Ply.h */,
+				7619BF1E220E243A003B2F5C /* PlyFile.cpp */,
+				7619BF27220E243B003B2F5C /* PlyFile.h */,
+				7619BF2E220E243B003B2F5C /* PNG.h */,
+				7619BF21220E243B003B2F5C /* PNG.inl */,
+				7619BF4E220E243D003B2F5C /* PointStream.h */,
+				7619BF48220E243C003B2F5C /* PointStream.inl */,
+				7619BF4F220E243D003B2F5C /* PointStreamData.h */,
+				7619BF3F220E243C003B2F5C /* Polynomial.h */,
+				7619BF23220E243B003B2F5C /* Polynomial.inl */,
+				7619BF45220E243C003B2F5C /* PPolynomial.h */,
+				7619BF46220E243C003B2F5C /* PPolynomial.inl */,
+				7619BF34220E243B003B2F5C /* RegularTree.h */,
+				7619BF25220E243B003B2F5C /* RegularTree.inl */,
+				7619BF3E220E243C003B2F5C /* SparseMatrix.h */,
+				7619BF4A220E243C003B2F5C /* SparseMatrix.inl */,
+				7619BF4B220E243C003B2F5C /* SparseMatrixInterface.h */,
+				7619BF35220E243B003B2F5C /* SparseMatrixInterface.inl */,
+				7619BF2C220E243B003B2F5C /* Window.h */,
+				7619BF42220E243C003B2F5C /* Window.inl */,
+			);
+			name = Src;
+			path = ../Src;
+			sourceTree = "<group>";
+		};
+		76EBE55520A2611E0052126A /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		7691CAE220927C200029084C /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				76734F4D22127434008CFD95 /* trees.h in Headers */,
+				7619BF80220E243D003B2F5C /* JPEG.h in Headers */,
+				7619BF68220E243D003B2F5C /* RegularTree.h in Headers */,
+				7619BF7F220E243D003B2F5C /* SparseMatrixInterface.h in Headers */,
+				7619BF63220E243D003B2F5C /* Array.h in Headers */,
+				76734EC7221273D9008CFD95 /* jerror.h in Headers */,
+				76734EE8221273D9008CFD95 /* jmorecfg.h in Headers */,
+				76734EED221273D9008CFD95 /* jinclude.h in Headers */,
+				76734EE6221273D9008CFD95 /* jconfig.h in Headers */,
+				7619BF77220E243D003B2F5C /* Allocator.h in Headers */,
+				762D5A9A2214BCC50066EE74 /* Parameters.hpp in Headers */,
+				76734F4A22127434008CFD95 /* inffixed.h in Headers */,
+				7619BF62220E243D003B2F5C /* PNG.h in Headers */,
+				76734F4522127434008CFD95 /* inftrees.h in Headers */,
+				7619BF66220E243D003B2F5C /* BinaryNode.h in Headers */,
+				7664E3DF22138EE600BC5377 /* SurfaceTrimmerExecute.hpp in Headers */,
+				7619BF73220E243D003B2F5C /* Polynomial.h in Headers */,
+				7619BF7B220E243D003B2F5C /* Ply.h in Headers */,
+				7619BF6F220E243D003B2F5C /* MarchingCubes.h in Headers */,
+				76734F19221273F8008CFD95 /* pngasmrd.h in Headers */,
+				76734F5A22127434008CFD95 /* infutil.h in Headers */,
+				76734F4C22127434008CFD95 /* infcodes.h in Headers */,
+				76734F5E22127434008CFD95 /* zconf.h in Headers */,
+				76734F5222127434008CFD95 /* deflate.h in Headers */,
+				76734EDF221273D9008CFD95 /* jmemsys.h in Headers */,
+				762D5A952214ACBA0066EE74 /* ExecuteEntryFunctions.hpp in Headers */,
+				76734EFA221273D9008CFD95 /* jpegint.h in Headers */,
+				76734F1B221273F8008CFD95 /* png.h in Headers */,
+				7691CB1220927E6B0029084C /* MeshingOperation.h in Headers */,
+				7619BF53220E243D003B2F5C /* CmdLineParser.h in Headers */,
+				7619BF67220E243D003B2F5C /* Geometry.h in Headers */,
+				7619BF82220E243D003B2F5C /* PointStream.h in Headers */,
+				7619BF5B220E243D003B2F5C /* PlyFile.h in Headers */,
+				76734F4F22127434008CFD95 /* inffast.h in Headers */,
+				7619BF60220E243D003B2F5C /* Window.h in Headers */,
+				76734EEB221273D9008CFD95 /* jversion.h in Headers */,
+				7619BF7D220E243D003B2F5C /* LinearSolvers.h in Headers */,
+				7619BF72220E243D003B2F5C /* SparseMatrix.h in Headers */,
+				7619BF6E220E243D003B2F5C /* BSplineData.h in Headers */,
+				76734EDE221273D9008CFD95 /* jdhuff.h in Headers */,
+				7619BF5E220E243D003B2F5C /* FunctionData.h in Headers */,
+				7619BF84220E243D003B2F5C /* Factor.h in Headers */,
+				76734F1F221273F8008CFD95 /* pngconf.h in Headers */,
+				7691CAF620927C200029084C /* Meshing.h in Headers */,
+				76734F5722127434008CFD95 /* infblock.h in Headers */,
+				7619BF5F220E243D003B2F5C /* MAT.h in Headers */,
+				76734EC3221273D9008CFD95 /* jpeglib.h in Headers */,
+				7619BF83220E243D003B2F5C /* PointStreamData.h in Headers */,
+				76734EF8221273D9008CFD95 /* jchuff.h in Headers */,
+				76734F5322127434008CFD95 /* zlib.h in Headers */,
+				7619BF54220E243D003B2F5C /* Image.h in Headers */,
+				7619BF5A220E243D003B2F5C /* FEMTree.h in Headers */,
+				76734EE9221273D9008CFD95 /* jdct.h in Headers */,
+				76734F4422127434008CFD95 /* zutil.h in Headers */,
+				7619BF78220E243D003B2F5C /* MyMiscellany.h in Headers */,
+				7664E3D82213436600BC5377 /* PoissonReconExecute.hpp in Headers */,
+				7619BF79220E243D003B2F5C /* PPolynomial.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		762D5AC022162C0B0066EE74 /* meshing */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 762D5AC922162C0B0066EE74 /* Build configuration list for PBXNativeTarget "meshing" */;
+			buildPhases = (
+				762D5ABD22162C0B0066EE74 /* Sources */,
+				762D5ABE22162C0B0066EE74 /* Frameworks */,
+				762D5ABF22162C0B0066EE74 /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = meshing;
+			productName = libmeshing;
+			productReference = 762D5AC122162C0B0066EE74 /* libmeshing.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+		7691CAE420927C200029084C /* Meshing */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 7691CAF920927C200029084C /* Build configuration list for PBXNativeTarget "Meshing" */;
+			buildPhases = (
+				7691CAE020927C200029084C /* Sources */,
+				7691CAE220927C200029084C /* Headers */,
+				7691CAE320927C200029084C /* Resources */,
+				7691CAE120927C200029084C /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = Meshing;
+			productName = PoissonRecon;
+			productReference = 7691CAE520927C200029084C /* Meshing.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+		7691CAED20927C200029084C /* MeshingTests */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 7691CAFC20927C200029084C /* Build configuration list for PBXNativeTarget "MeshingTests" */;
+			buildPhases = (
+				7691CAEA20927C200029084C /* Sources */,
+				7691CAEB20927C200029084C /* Frameworks */,
+				7691CAEC20927C200029084C /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				7691CAF120927C200029084C /* PBXTargetDependency */,
+			);
+			name = MeshingTests;
+			productName = PoissonReconTests;
+			productReference = 7691CAEE20927C200029084C /* MeshingTests.xctest */;
+			productType = "com.apple.product-type.bundle.unit-test";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		7691CADC20927C200029084C /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0930;
+				LastUpgradeCheck = 1020;
+				ORGANIZATIONNAME = "Standard Cyborg";
+				TargetAttributes = {
+					762D5AC022162C0B0066EE74 = {
+						CreatedOnToolsVersion = 10.1;
+					};
+					7691CAE420927C200029084C = {
+						CreatedOnToolsVersion = 9.3;
+					};
+					7691CAED20927C200029084C = {
+						CreatedOnToolsVersion = 9.3;
+					};
+				};
+			};
+			buildConfigurationList = 7691CADF20927C200029084C /* Build configuration list for PBXProject "PoissonRecon" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 7691CADB20927C200029084C;
+			productRefGroup = 7691CAE620927C200029084C /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				7691CAE420927C200029084C /* Meshing */,
+				7691CAED20927C200029084C /* MeshingTests */,
+				762D5AC022162C0B0066EE74 /* meshing */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		7691CAE320927C200029084C /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		7691CAEC20927C200029084C /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				76F402CF20A285DF002FE190 /* app_scan.jpg in Resources */,
+				76EBE55D20A2769C0052126A /* app_scan.ply in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		762D5ABD22162C0B0066EE74 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				762D5AD5221630A00066EE74 /* ExecuteEntryFunctions.cpp in Sources */,
+				762D5B27221638900066EE74 /* PlyFile.cpp in Sources */,
+				762D5AD4221630910066EE74 /* MeshingOperation.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		7691CAE020927C200029084C /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				76734F5B22127434008CFD95 /* gzio.c in Sources */,
+				76734ED5221273D9008CFD95 /* jcmaster.cpp in Sources */,
+				76734F14221273F8008CFD95 /* pngerror.c in Sources */,
+				76734ED3221273D9008CFD95 /* jcphuff.cpp in Sources */,
+				76734F1A221273F8008CFD95 /* pngvcrd.c in Sources */,
+				76734F4722127434008CFD95 /* compress.c in Sources */,
+				76734F27221273F8008CFD95 /* pngset.c in Sources */,
+				76734EEA221273D9008CFD95 /* jfdctflt.cpp in Sources */,
+				76734EEF221273D9008CFD95 /* jccoefct.cpp in Sources */,
+				76734EC5221273D9008CFD95 /* jcprepct.cpp in Sources */,
+				76734F5122127434008CFD95 /* zutil.c in Sources */,
+				76734EC2221273D9008CFD95 /* jdhuff.cpp in Sources */,
+				7691CB1320927E6B0029084C /* MeshingOperation.mm in Sources */,
+				76734F5022127434008CFD95 /* crc32.c in Sources */,
+				76734ED2221273D9008CFD95 /* jmemnobs.cpp in Sources */,
+				76734F4E22127434008CFD95 /* infblock.c in Sources */,
+				76734ECD221273D9008CFD95 /* jcmainct.cpp in Sources */,
+				76734F5C22127434008CFD95 /* inffast.c in Sources */,
+				76734ECC221273D9008CFD95 /* jcapistd.cpp in Sources */,
+				76734ECB221273D9008CFD95 /* jdmaster.cpp in Sources */,
+				76734F1E221273F8008CFD95 /* pnggccrd.c in Sources */,
+				76734EC9221273D9008CFD95 /* jdmainct.cpp in Sources */,
+				76734EC1221273D9008CFD95 /* jmemmgr.cpp in Sources */,
+				76734F5822127434008CFD95 /* trees.c in Sources */,
+				76734ECE221273D9008CFD95 /* jdpostct.cpp in Sources */,
+				76734ED4221273D9008CFD95 /* jdapistd.cpp in Sources */,
+				76734EC6221273D9008CFD95 /* jidctred.cpp in Sources */,
+				76734F20221273F8008CFD95 /* pngpread.c in Sources */,
+				76734EF6221273D9008CFD95 /* jddctmgr.cpp in Sources */,
+				76734EF9221273D9008CFD95 /* jcmarker.cpp in Sources */,
+				76734F1C221273F8008CFD95 /* pngwutil.c in Sources */,
+				76734ECA221273D9008CFD95 /* jdphuff.cpp in Sources */,
+				76734EE7221273D9008CFD95 /* jdcoefct.cpp in Sources */,
+				76734F15221273F8008CFD95 /* pngwio.c in Sources */,
+				7619BF52220E243D003B2F5C /* PlyFile.cpp in Sources */,
+				76734F22221273F8008CFD95 /* pngmem.c in Sources */,
+				76734F5922127434008CFD95 /* infcodes.c in Sources */,
+				762D5A942214ACBA0066EE74 /* ExecuteEntryFunctions.cpp in Sources */,
+				76734EC8221273D9008CFD95 /* jidctflt.cpp in Sources */,
+				76734F1D221273F8008CFD95 /* pngwtran.c in Sources */,
+				76734EF2221273D9008CFD95 /* jdmerge.cpp in Sources */,
+				76734F24221273F8008CFD95 /* png.c in Sources */,
+				76734EF3221273D9008CFD95 /* jerror.cpp in Sources */,
+				76734ECF221273D9008CFD95 /* jdatadst.cpp in Sources */,
+				76734ED7221273D9008CFD95 /* jdapimin.cpp in Sources */,
+				76734EE5221273D9008CFD95 /* jchuff.cpp in Sources */,
+				76734EE4221273D9008CFD95 /* jdmarker.cpp in Sources */,
+				76734F5522127434008CFD95 /* inftrees.c in Sources */,
+				76734EDB221273D9008CFD95 /* jquant1.cpp in Sources */,
+				76734F16221273F8008CFD95 /* pngtrans.c in Sources */,
+				76734EE0221273D9008CFD95 /* jutils.cpp in Sources */,
+				76734F13221273F8008CFD95 /* pngrio.c in Sources */,
+				76734F21221273F8008CFD95 /* pngread.c in Sources */,
+				76734EF1221273D9008CFD95 /* jcinit.cpp in Sources */,
+				76734EDD221273D9008CFD95 /* jctrans.cpp in Sources */,
+				76734F25221273F8008CFD95 /* pngrtran.c in Sources */,
+				76734EF4221273D9008CFD95 /* jdcolor.cpp in Sources */,
+				76734EF5221273D9008CFD95 /* jdtrans.cpp in Sources */,
+				76734ED9221273D9008CFD95 /* jidctfst.cpp in Sources */,
+				76734EE2221273D9008CFD95 /* jdinput.cpp in Sources */,
+				76734EDC221273D9008CFD95 /* jccolor.cpp in Sources */,
+				76734F5622127434008CFD95 /* uncompr.c in Sources */,
+				76734EEE221273D9008CFD95 /* jfdctint.cpp in Sources */,
+				76734ED8221273D9008CFD95 /* jcparam.cpp in Sources */,
+				76734EE3221273D9008CFD95 /* jcdctmgr.cpp in Sources */,
+				76734F26221273F8008CFD95 /* pngrutil.c in Sources */,
+				76734ED1221273D9008CFD95 /* jcomapi.cpp in Sources */,
+				76734EF7221273D9008CFD95 /* jfdctfst.cpp in Sources */,
+				76734F4922127434008CFD95 /* infutil.c in Sources */,
+				76734EDA221273D9008CFD95 /* jquant2.cpp in Sources */,
+				76734EEC221273D9008CFD95 /* jcsample.cpp in Sources */,
+				76734F4622127434008CFD95 /* inflate.c in Sources */,
+				76734EF0221273D9008CFD95 /* jdsample.cpp in Sources */,
+				76734ED6221273D9008CFD95 /* jidctint.cpp in Sources */,
+				76734F4822127434008CFD95 /* deflate.c in Sources */,
+				76734EE1221273D9008CFD95 /* jdatasrc.cpp in Sources */,
+				76734F18221273F8008CFD95 /* pngwrite.c in Sources */,
+				76734EC4221273D9008CFD95 /* jcapimin.cpp in Sources */,
+				76734F23221273F8008CFD95 /* pngget.c in Sources */,
+				76734F5D22127434008CFD95 /* adler32.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		7691CAEA20927C200029084C /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				7691CAF420927C200029084C /* MeshingTests.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		7691CAF120927C200029084C /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 7691CAE420927C200029084C /* Meshing */;
+			targetProxy = 7691CAF020927C200029084C /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		762D5AC722162C0B0066EE74 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				BITCODE_GENERATION_MODE = bitcode;
+				CLANG_ENABLE_MODULES = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				OTHER_LDFLAGS = "-ObjC";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos macosx";
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALID_ARCHS = "arm64 x86_64";
+			};
+			name = Debug;
+		};
+		762D5AC822162C0B0066EE74 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				BITCODE_GENERATION_MODE = bitcode;
+				CLANG_ENABLE_MODULES = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				OTHER_LDFLAGS = "-ObjC";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos macosx";
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALID_ARCHS = "arm64 x86_64";
+			};
+			name = Release;
+		};
+		7691CAF720927C200029084C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_STYLE = Manual;
+				COMPILER_INDEX_STORE_ENABLE = NO;
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_LABEL = YES;
+				GCC_WARN_UNUSED_VALUE = NO;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.14;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = NO;
+				SDKROOT = macosx;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Debug;
+		};
+		7691CAF820927C200029084C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_STYLE = Manual;
+				COMPILER_INDEX_STORE_ENABLE = NO;
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_LABEL = YES;
+				GCC_WARN_UNUSED_VALUE = NO;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.14;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = macosx;
+				VALIDATE_PRODUCT = YES;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Release;
+		};
+		7691CAFA20927C200029084C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				BITCODE_GENERATION_MODE = bitcode;
+				CLANG_ANALYZER_NONNULL = NO;
+				DEFINES_MODULE = YES;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_BITCODE = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/..";
+				INFOPLIST_FILE = Meshing/Info.plist;
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.standardcyborg.PoissonRecon;
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos macosx";
+				SYSTEM_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../JPEG";
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALID_ARCHS = "arm64 arm64e armv7 armv7s x86_64";
+			};
+			name = Debug;
+		};
+		7691CAFB20927C200029084C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				BITCODE_GENERATION_MODE = bitcode;
+				CLANG_ANALYZER_NONNULL = NO;
+				DEFINES_MODULE = YES;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_BITCODE = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/..";
+				INFOPLIST_FILE = Meshing/Info.plist;
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.standardcyborg.PoissonRecon;
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos macosx";
+				SYSTEM_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../JPEG";
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALID_ARCHS = "arm64 arm64e armv7 armv7s x86_64";
+			};
+			name = Release;
+		};
+		7691CAFD20927C200029084C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = KAW83335BG;
+				GCC_FAST_MATH = YES;
+				INFOPLIST_FILE = MeshingTests/Info.plist;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.standardcyborg.MeshingTests;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		7691CAFE20927C200029084C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "-";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = KAW83335BG;
+				GCC_FAST_MATH = YES;
+				GCC_OPTIMIZATION_LEVEL = fast;
+				INFOPLIST_FILE = MeshingTests/Info.plist;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.standardcyborg.MeshingTests;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		762D5AC922162C0B0066EE74 /* Build configuration list for PBXNativeTarget "meshing" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				762D5AC722162C0B0066EE74 /* Debug */,
+				762D5AC822162C0B0066EE74 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		7691CADF20927C200029084C /* Build configuration list for PBXProject "PoissonRecon" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				7691CAF720927C200029084C /* Debug */,
+				7691CAF820927C200029084C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		7691CAF920927C200029084C /* Build configuration list for PBXNativeTarget "Meshing" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				7691CAFA20927C200029084C /* Debug */,
+				7691CAFB20927C200029084C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		7691CAFC20927C200029084C /* Build configuration list for PBXNativeTarget "MeshingTests" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				7691CAFD20927C200029084C /* Debug */,
+				7691CAFE20927C200029084C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 7691CADC20927C200029084C /* Project object */;
+}
diff --git a/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 0000000..3b11d6e
--- /dev/null
+++ b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:PoissonRecon.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
new file mode 100644
index 0000000..18d9810
--- /dev/null
+++ b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
diff --git a/PoissonRecon-Xcode/PoissonRecon.xcodeproj/xcshareddata/xcschemes/libmeshing.xcscheme b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/xcshareddata/xcschemes/libmeshing.xcscheme
new file mode 100644
index 0000000..7128f5b
--- /dev/null
+++ b/PoissonRecon-Xcode/PoissonRecon.xcodeproj/xcshareddata/xcschemes/libmeshing.xcscheme
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "1020"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "762D5AC022162C0B0066EE74"
+               BuildableName = "libmeshing.a"
+               BlueprintName = "meshing"
+               ReferencedContainer = "container:PoissonRecon.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+         <TestableReference
+            skipped = "NO">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "7691CAED20927C200029084C"
+               BuildableName = "MeshingTests.xctest"
+               BlueprintName = "MeshingTests"
+               ReferencedContainer = "container:PoissonRecon.xcodeproj">
+            </BuildableReference>
+         </TestableReference>
+      </Testables>
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "762D5AC022162C0B0066EE74"
+            BuildableName = "libmeshing.a"
+            BlueprintName = "meshing"
+            ReferencedContainer = "container:PoissonRecon.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Release"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "762D5AC022162C0B0066EE74"
+            BuildableName = "libmeshing.a"
+            BlueprintName = "meshing"
+            ReferencedContainer = "container:PoissonRecon.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "762D5AC022162C0B0066EE74"
+            BuildableName = "libmeshing.a"
+            BlueprintName = "meshing"
+            ReferencedContainer = "container:PoissonRecon.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
diff --git a/PoissonRecon.sln b/PoissonRecon.sln
deleted file mode 100644
index ed7a3b4..0000000
--- a/PoissonRecon.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 14
-VisualStudioVersion = 14.0.25420.1
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PoissonRecon", "PoissonRecon.vcxproj", "{46F87D0E-C53A-4F95-AB48-A5DBA8014340}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SurfaceTrimmer", "SurfaceTrimmer.vcxproj", "{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SSDRecon", "SSDRecon.vcxproj", "{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Release|x64 = Release|x64
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Debug|x64.ActiveCfg = Debug|x64
-		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Debug|x64.Build.0 = Debug|x64
-		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x64.ActiveCfg = Release|x64
-		{46F87D0E-C53A-4F95-AB48-A5DBA8014340}.Release|x64.Build.0 = Release|x64
-		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Debug|x64.ActiveCfg = Debug|x64
-		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Debug|x64.Build.0 = Debug|x64
-		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x64.ActiveCfg = Release|x64
-		{99BEAFED-8DB9-4B7D-A0BE-5186158193FE}.Release|x64.Build.0 = Release|x64
-		{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}.Debug|x64.ActiveCfg = Debug|x64
-		{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}.Debug|x64.Build.0 = Debug|x64
-		{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}.Release|x64.ActiveCfg = Release|x64
-		{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}.Release|x64.Build.0 = Release|x64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/PoissonRecon.vcxproj b/PoissonRecon.vcxproj
index e5d1740..b5acc2c 100644
--- a/PoissonRecon.vcxproj
+++ b/PoissonRecon.vcxproj
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
@@ -23,31 +23,30 @@
     <ProjectGuid>{46F87D0E-C53A-4F95-AB48-A5DBA8014340}</ProjectGuid>
     <RootNamespace>PoissonRecon</RootNamespace>
     <Keyword>Win32Proj</Keyword>
-    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -71,7 +70,7 @@
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)\Obj\$(Platform)\$(Configuration)\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Obj\$(Platform)\$(Configuration)\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)Bin\$(Platform)\$(Configuration)\</OutDir>
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
@@ -116,7 +115,6 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -156,7 +154,7 @@
       <TargetEnvironment>X64</TargetEnvironment>
     </Midl>
     <ClCompile>
-      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <PrecompiledHeader>
@@ -167,6 +165,8 @@
       <OpenMPSupport>true</OpenMPSupport>
       <IntrinsicFunctions>false</IntrinsicFunctions>
       <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <AdditionalOptions>
+      </AdditionalOptions>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -177,58 +177,17 @@
       <DataExecutionPrevention>
       </DataExecutionPrevention>
       <TargetMachine>MachineX64</TargetMachine>
-      <AdditionalDependencies>psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>ZLIB.lib;JPEG.lib;PNG.lib;psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <IgnoreSpecificDefaultLibraries>
       </IgnoreSpecificDefaultLibraries>
       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp" />
-    <ClCompile Include="Src\Factor.cpp" />
-    <ClCompile Include="Src\Geometry.cpp" />
-    <ClCompile Include="Src\MarchingCubes.cpp" />
     <ClCompile Include="Src\PlyFile.cpp" />
     <ClCompile Include="Src\PoissonRecon.cpp" />
   </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\Allocator.h" />
-    <ClInclude Include="Src\Array.h" />
-    <ClInclude Include="Src\BinaryNode.h" />
-    <ClInclude Include="Src\BSplineData.h" />
-    <ClInclude Include="Src\CmdLineParser.h" />
-    <ClInclude Include="Src\Factor.h" />
-    <ClInclude Include="Src\Geometry.h" />
-    <ClInclude Include="Src\MarchingCubes.h" />
-    <ClInclude Include="Src\MAT.h" />
-    <ClInclude Include="Src\MemoryUsage.h" />
-    <ClInclude Include="Src\MultiGridOctreeData.h" />
-    <ClInclude Include="Src\MyTime.h" />
-    <ClInclude Include="Src\Octree.h" />
-    <ClInclude Include="Src\Ply.h" />
-    <ClInclude Include="Src\PointStream.h" />
-    <ClInclude Include="Src\Polynomial.h" />
-    <ClInclude Include="Src\PPolynomial.h" />
-    <ClInclude Include="Src\SparseMatrix.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\Array.inl" />
-    <None Include="Src\CmdLineParser.inl" />
-    <None Include="Src\MAT.inl" />
-    <None Include="Src\MultiGridOctreeData.Evaluation.inl" />
-    <None Include="Src\MultiGridOctreeData.IsoSurface.inl" />
-    <None Include="Src\MultiGridOctreeData.SortedTreeNodes.inl" />
-    <None Include="Src\MultiGridOctreeData.System.inl" />
-    <None Include="Src\MultiGridOctreeData.WeightedSamples.inl" />
-    <None Include="Src\PointStream.inl" />
-    <None Include="Src\BSplineData.inl" />
-    <None Include="Src\Geometry.inl" />
-    <None Include="Src\MultiGridOctreeData.inl" />
-    <None Include="Src\Octree.inl" />
-    <None Include="Src\Polynomial.inl" />
-    <None Include="Src\PPolynomial.inl" />
-    <None Include="Src\SparseMatrix.inl" />
-  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/PoissonRecon.vcxproj.filters b/PoissonRecon.vcxproj.filters
deleted file mode 100644
index 16466a4..0000000
--- a/PoissonRecon.vcxproj.filters
+++ /dev/null
@@ -1,143 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Include Files">
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
-      <Extensions>inc;inl</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\Factor.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\Geometry.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\MarchingCubes.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\PoissonRecon.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\PlyFile.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\Allocator.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\BinaryNode.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\BSplineData.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\CmdLineParser.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Factor.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Geometry.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MarchingCubes.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MemoryUsage.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MultiGridOctreeData.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Octree.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\PointStream.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Polynomial.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\PPolynomial.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\SparseMatrix.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MAT.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Array.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Ply.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MyTime.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\BSplineData.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Geometry.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Octree.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Polynomial.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\PPolynomial.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\SparseMatrix.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\PointStream.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\CmdLineParser.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MAT.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Array.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.SortedTreeNodes.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.IsoSurface.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.Evaluation.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.WeightedSamples.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.System.inl">
-      <Filter>Include Files</Filter>
-    </None>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 482b974..738034d 100644
--- a/README.md
+++ b/README.md
@@ -1,55 +1,80 @@
-<CENTER><H2>Screened Poisson Surface Reconstruction <BR>(and Smoothed Signed Distance Reconstruction)<BR>Version 9.01</H2></CENTER>
-<CENTER>
-<A HREF="#LINKS">links</A>
-<A HREF="#EXECUTABLES">executables</A>
-<A HREF="#USAGE">usage</A>
-<A HREF="#CHANGES">changes</A>
-<A HREF="#SUPPORT">support</A>
-</CENTER>
-<HR>
-<A NAME="LINKS"><B>LINKS</B></A><br>
+<center><h2>Adaptive Multigrid Solvers (Version 10.07)</h2></center>
+<center>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/index.html#LINKS">links</a>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/index.html#EXECUTABLES">executables</a>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/index.html#USAGE">usage</a>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/index.html#CHANGES">changes</a>
+<!--
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/index.html#SUPPORT">support</a>
+-->
+</center>
+<hr>
+This code-base was born from the Poisson Surface Reconstruction code. It has evolved to support more general adaptive finite-elements systems:
 <UL>
-<B>Papers:</B>
-<A href="http://www.cs.jhu.edu/~misha/MyPapers/SGP06.pdf">[Kazhdan, Bolitho, and Hoppe, 2006]</A>,
-<A href="http://mesh.brown.edu/ssd/paper.html">[Calakli and Taubin, 2011]</A>,
-<A href="http://www.cs.jhu.edu/~misha/MyPapers/ToG13.pdf">[Kazhdan and Hoppe, 2013]</A>
-<br>
-<B>Executables: </B>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/PoissonRecon.x64.zip">Win64</A><BR>
-<B>Source Code:</B>
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/PoissonRecon.zip">ZIP</A> <A HREF="https://github.com/mkazhdan/PoissonRecon">GitHub</A><BR>
-<B>Older Versions:</B>
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.0/">V9.0</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version8.0/">V8.0</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version7.0/">V7.0</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13a/">V6.13a</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13/">V6.13</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.12/">V6.12</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.11/">V6.11</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.1/">V6.1</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6/">V6</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.71/">V5.71</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.6/">V5.6</A>,
-<A href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.5a/">V5.5a</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.1/">V5.1</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5/">V5</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.51/">V4.51</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">V4.5</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4/">V4</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version3/">V3</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version2/">V2</A>,
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version1/">V1</A>
+<LI> in spaces of arbitrary dimension,
+<LI> discretized using finite-elements of arbitrary degree,
+<LI> involving arbitrary function derivatives,
+<LI> with both point-wise and integrated constraints.
 </UL>
-<HR>
-<A NAME="EXECUTABLES"><B>EXECUTABLES</B></A><BR>
-<UL>
-<DL>
-<FONT SIZE="+1" ><B><A HREF="" ONCLICK="toggleAll( $('poisson_recon') ) ; return false;" >PoissonRecon</A></B></FONT>
-<DIV ID="poisson_recon">
-<DT><b>--in</b> &#60;<i>input points</i>&#62;
-<DD> This string is the name of the file from which the point set will be read.<br>
+<hr>
+<a name="LINKS"><b>LINKS</b></a><br>
+<ul>
+<b>Papers:</b>
+<a href="http://www.cs.jhu.edu/~misha/MyPapers/SGP06.pdf">[Kazhdan, Bolitho, and Hoppe, 2006]</a>,
+<a href="http://www.agarwala.org/efficient_gdc/">[Agarwala, 2007]</A>
+<a href="http://mesh.brown.edu/ssd/paper.html">[Calakli and Taubin, 2011]</a>,
+<A HREF="https://www.cs.cmu.edu/~kmcrane/Projects/HeatMethod/">[Crane, Weischedel, and Wardetzky, 2013]</a>,
+<a href="http://www.cs.jhu.edu/~misha/MyPapers/ToG13.pdf">[Kazhdan and Hoppe, 2013]</a>
+<br>
+<b>Executables: </b>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.07/AdaptiveSolvers.x64.zip">Win64</a><br>
+<b>Source Code:</b>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.07/AdaptiveSolvers.zip">ZIP</a> <a href="https://github.com/mkazhdan/PoissonRecon">GitHub</a><br>
+<b>Older Versions:</b>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.06/">V10.06</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/">V10.05</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.04/">V10.04</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.03/">V10.03</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.02/">V10.02</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.01/">V10.01</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.00/">V10.00</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.011/">V9.011</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/">V9.01</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.0/">V9.0</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version8.0/">V8.0</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version7.0/">V7.0</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13a/">V6.13a</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13/">V6.13</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.12/">V6.12</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.11/">V6.11</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.1/">V6.1</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6/">V6</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.71/">V5.71</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.6/">V5.6</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.5a/">V5.5a</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.1/">V5.1</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5/">V5</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.51/">V4.51</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">V4.5</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4/">V4</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version3/">V3</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version2/">V2</a>,
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version1/">V1</a>
+</ul>
+<hr>
+<a name="EXECUTABLES"><b>EXECUTABLES</b></a><br>
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>PoissonRecon</b></font>:
+Reconstructs a triangle mesh from a set of oriented 3D points by solving a Poisson system (solving a 3D Laplacian system with positional value constraints) <a href="http://www.cs.jhu.edu/~misha/MyPapers/SGP06.pdf">[Kazhdan, Bolitho, and Hoppe, 2006]</a>,
+<a href="http://www.cs.jhu.edu/~misha/MyPapers/ToG13.pdf">[Kazhdan and Hoppe, 2013]</a>
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input points</i>&gt;
+</dt><dd> This string is the name of the file from which the point set will be read.<br>
 If the file extension is <i>.ply</i>, the file should be in
-<A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format, giving the list of oriented
+<a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format, giving the list of oriented
 vertices with the x-, y-, and z-coordinates of the positions encoded by the properties <i>x</i>, <i>y</i>, and
 <i>z</i> and the x-, y-, and z-coordinates of the normals encoded by the properties <i>nx</i>, <i>ny</i>, and
 <i>nz</i> .<br>
@@ -60,119 +85,130 @@ Otherwise, the file should be an ascii file with groups of 6,
 white space delimited, numbers: x-, y-, and z-coordinates of the point's position, followed
 by the x-, y- and z-coordinates of the point's normal. (No information about the number of oriented point samples should be specified.)<br> 
 
-<DT>[<b>--out</b> &#60;<i>output triangle mesh</i>&#62;]
-<DD> This string is the name of the file to which the triangle mesh will be written. 
-The file is written in <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format.
+</dd><dt>[<b>--out</b> &lt;<i>output triangle mesh</i>&gt;]
+</dt><dd> This string is the name of the file to which the triangle mesh will be written. 
+The file is written in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format.
 
-<DT>[<b>--voxel</b> &#60;<i>output voxel grid</i>&#62;]
-<DD> This string is the name of the file to which the sampled implicit function will be written.
-The filw is wrtten out in binary, with the first 4 bytes corresponding to the (integer) sampling resolution, 2^<i>d</i>,
-and the next 4 x 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i> bytes corresponding to the (single precision) floating point values
+</dd><dt>[<b>--tree</b> &lt;<i>output octree and coefficients</i>&gt;]
+</dt><dd> This string is the name of the file to which the the octree and solution coefficients are to be written.
+
+</dd><dt>[<b>--grid</b> &lt;<i>output grid</i>&gt;]
+</dt><dd> This string is the name of the file to which the sampled implicit function will be written.
+The file is written out in binary, with the first 4 bytes corresponding to the (integer) sampling resolution, 2^<i>d</i>,
+and the next 4 x 2^<i>d</i> x 2^<i>d</i> x ... bytes corresponding to the (single precision) floating point values
 of the implicit function.
 
-<DT>[<b>--degree</b> &#60;<i>B-spline degree</i>&#62;]
-<DD> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
-Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<BR>
+</dd><dt>[<b>--degree</b> &lt;<i>B-spline degree</i>&gt;]
+</dt><dd> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
+Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<br>
 The default value for this parameter is 2.
 
-<DT>[<b>--bType</b> &#60;<i>boundary type</i>&#62;]
-<DD> This integer specifies the boundary type for the finite elements. Valid values are:
-<UL>
-<LI> <B>1</B>: Free boundary constraints
-<LI> <B>2</B>: Dirichlet boundary constraints
-<LI> <B>3</B>: Neumann boundary constraints
-</UL>
+</dd><dt>[<b>--bType</b> &lt;<i>boundary type</i>&gt;]
+</dt><dd> This integer specifies the boundary type for the finite elements. Valid values are:
+<ul>
+<li> <b>1</b>: Free boundary constraints
+</li><li> <b>2</b>: Dirichlet boundary constraints
+</li><li> <b>3</b>: Neumann boundary constraints
+</li></ul>
 The default value for this parameter is 3 (Neumann).
 
-<DT>[<b>--depth</b> &#60;<i>reconstruction depth</i>&#62;]
-<DD> This integer is the maximum depth of the tree that will be used for surface reconstruction.
-Running at depth <i>d</i> corresponds to solving on a voxel grid whose resolution is no larger than
-2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i>. Note that since the reconstructor adapts the octree to the
+</dd><dt>[<b>--depth</b> &lt;<i>reconstruction depth</i>&gt;]
+</dt><dd> This integer is the maximum depth of the tree that will be used for surface reconstruction.
+Running at depth <i>d</i> corresponds to solving on a grid whose resolution is no larger than
+2^<i>d</i> x 2^<i>d</i> x ... Note that since the reconstructor adapts the octree to the
 sampling density, the specified reconstruction depth is only an upper bound.<br>
 The default value for this parameter is 8.
 
-<DT>[<b>--scale</b> &#60;<i>scale factor</i>&#62;]
-<DD> This floating point value specifies the ratio between the diameter of the cube used for reconstruction
+</dd><dt>[<b>--width</b> &lt;<i>finest cell width</i>&gt;]
+</dt><dd> This floating point value specifies the target width of the finest level octree cells.<br>
+This parameter is ignored if the <B>--depth</B> is also specified.
+
+</dd><dt>[<b>--scale</b> &lt;<i>scale factor</i>&gt;]
+</dt><dd> This floating point value specifies the ratio between the diameter of the cube used for reconstruction
 and the diameter of the samples' bounding cube.<br>
 The default value is 1.1.
 
-<DT>[<b>--samplesPerNode</b> &#60;<i>minimum number of samples</i>&#62;]
-<DD> This floating point value specifies the minimum number of sample points that should fall within an
+</dd><dt>[<b>--samplesPerNode</b> &lt;<i>minimum number of samples</i>&gt;]
+</dt><dd> This floating point value specifies the minimum number of sample points that should fall within an
 octree node as the octree construction is adapted to sampling density. For noise-free samples, small values
 in the range [1.0 - 5.0] can be used. For more noisy samples, larger values in the range [15.0 - 20.0] may
 be needed to provide a smoother, noise-reduced, reconstruction.<br>
 The default value is 1.0.
 
-<DT>[<b>--pointWeight</b> &#60;<i>interpolation weight</i>&#62;]
-<DD> This floating point value specifies the importance that interpolation of the point samples
+</dd><dt>[<b>--pointWeight</b> &lt;<i>interpolation weight</i>&gt;]
+</dt><dd> This floating point value specifies the importance that interpolation of the point samples
 is given in the formulation of the screened Poisson equation.<br>
 The results of the original (unscreened) Poisson Reconstruction can be obtained by setting this value to 0.<br>
-The default value for this parameter is 4.
+The default value for this parameter is twice the B-spline degree.
 
-<DT>[<b>--confidence</b>]
-<DD> Enabling this flag tells the reconstructor to use the size of the normals as confidence information. When the flag
-is not enabled, all normals are normalized to have unit-length prior to reconstruction.
+</dd><dt>[<b>--iters</b> &lt;<i>Gauss-Seidel iterations per level</i>&gt;]
+</dt><dd> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the octree hierarchy.<br>
+The default value for this parameter is 8.
 
-<DT>[<b>--nWeights</b>]
-<DD> Enabling this flag tells the reconstructor to use the size of the normals to modulate the interpolation weights. When the flag
-is not enabled, all points are given the same weight.
+</dd><dt>[<b>--density</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output the estimated depth values of the iso-surface vertices.
 
-<DT>[<b>--iters</b> &#60;<i>GS iters</i>&#62;]
-<DD> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the hiearchy.<br>
-The default value for this parameter is 8.
+</dd><dt>[<b>--normals</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output vertex normals, computed from the gradients of the implicit function.
 
-<DT>[<b>--cgDepth</b> &#60;<i>conjugate gradients solver depth</i>&#62;]
-<DD> This integer is the depth up to which a conjugate-gradients solver will be used to solve the linear system. Beyond this depth Gauss-Seidel relaxation will be used.<br>
-The default value for this parameter is 0.
+</dd><dt>[<b>--colors</b>]
+</dt><dd> Enabling this flag tells the reconstructor to read in color values with the input points and extrapolate those to the vertices of the output.
 
-<DT>[<b>--fullDepth</b> &#60;<i>adaptive octree depth</i>&#62;]
-<DD> This integer specifies the depth beyond depth the octree will be adapted.
-At coarser depths, the octree will be complete, containing all 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i> nodes.<br>
-The default value for this parameter is 5.
+</dd><dt>[<b>--data</b> &lt;<i>pull factor</i>&gt;]
+</dt><dd> If <B>--colors</B> is specified, this floating point value specifies the relative importance
+of finer color estimates over lower ones.<BR>
+The default value for this parameter is 32.
 
-<DT>[<b>--voxelDepth</b> &#60;<i>voxel sampling depth</i>&#62;]
-<DD> This integer is the depth of the regular grid over which the implicit function is to be sampled.
-Running at depth <i>d</i> corresponds to sampling on a voxel grid whose resolution is 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i>.<br>
-The default value for this parameter is the value of the <B>--depth</B> parameter.
+</dd><dt>[<b>--confidence</b> &lt;<i>normal confidence exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent to be applied to a point's confidence to adjust its weight. (A point's confidence is defined by the magnitude of its normal.)<BR>
+The default value for this parameter is 0.
 
-<DT>[<b>--primalVoxel</b>]
-<DD> Enabling this flag when outputing to a voxel file has the reconstructor sample the implicit function at the corners of the grid, rather than the centers of the cells.
+</dd><dt>[<b>--confidenceBias</b> &lt;<i>normal confidence bias exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent to be applied to a point's confidence to bias the resolution at which the sample contributes to the linear system. (Points with lower confidence are biased to contribute at coarser resolutions.)<BR>
+The default value for this parameter is 0.
 
-<DT>[<b>--color</b> &#60;<i>pull factor</i>&#62;]
-<DD> If specified, the reconstruction code assumes that the input is equipped with colors and will extrapolate
-the color values to the vertices of the reconstructed mesh. The floating point value specifies the relative importance
-of finer color estimates over lower ones. (In practice, we have found that a pull factor of 16 works well.)
+</dd><dt>[<b>--primalGrid</b>]
+</dt><dd> Enabling this flag when outputing to a grid file has the reconstructor sample the implicit function at the corners of the grid, rather than the centers of the cells.
 
-<DT>[<b>--density</b>]
-<DD> Enabling this flag tells the reconstructor to output the estimated depth values of the iso-surface vertices.
+</dd><dt>[<b>--linearFit</b>]
+</dt><dd> Enabling this flag has the reconstructor use linear interpolation to estimate the positions of iso-vertices.
 
-<DT>[<b>--linearFit</b>]
-<DD> Enabling this flag has the reconstructor use linear interpolation to estimate the positions of iso-vertices.
+</dd><dt>[<b>--polygonMesh</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output a polygon mesh (rather than triangulating the results of Marching Cubes).
 
-<DT>[<b>--polygonMesh</b>]
-<DD> Enabling this flag tells the reconstructor to output a polygon mesh (rather than triangulating the results of Marching Cubes).
+</dd><dt>[<b>--tempDir</b> &lt;<i>temporary output directory</i>&gt;]
+</dt><dd> This string is the name of the directory to which temporary files will be written.
 
-<DT>[<b>--threads</b> &#60;<i>number of processing threads</i>&#62;]
-<DD> This integer specifies the number of threads across which the reconstruction
-algorithm should be parallelized.<br>
+</dd><dt>[<b>--threads</b> &lt;<i>number of processing threads</i>&gt;]
+</dt><dd> This integer specifies the number of threads across which the algorithm should be parallelized.<br>
 The default value for this parameter is equal to the numer of (virtual) processors on the executing  machine.
 
-<DT>[<b>--verbose</b>]
-<DD> Enabling this flag provides a more verbose description of the running times and memory usages of
-individual components of the surface reconstructor.
-</DIV>
-</DL>
-</UL>
+</dd><dt>[<b>--maxMemory</b> &lt;<i>maximum memory usage (in GB)</i>&gt;]
+</dt><dd> If positive, this integer value specifies the peak memory utilization for running the reconstruction code (forcing the execution to terminate if the limit is exceeded).
 
+</dd><dt>[<b>--performance</b>]
+</dt><dd> Enabling this flag provides running time and peak memory usage at the end of the execution.
 
-<UL>
-<DL>
-<FONT SIZE="+1"><B><A HREF="" ONCLICK="toggleAll( $('ssd_recon') ) ; return false;">SSDRecon</A></B></FONT>
-<DIV ID="ssd_recon">
-<DT><b>--in</b> &#60;<i>input points</i>&#62;
-<DD> This string is the name of the file from which the point set will be read.<br>
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of individual components of the surface reconstructor.
+
+</dd>
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>SSDRecon</b></font>:
+Reconstructs a surface mesh from a set of oriented 3D points by solving for a Smooth Signed Distance function (solving a 3D bi-Laplacian system with positional value and gradient constraints) <a href="http://mesh.brown.edu/ssd/paper.html">[Calakli and Taubin, 2011]</a>
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input points</i>&gt;
+</dt><dd> This string is the name of the file from which the point set will be read.<br>
 If the file extension is <i>.ply</i>, the file should be in
-<A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format, giving the list of oriented
+<a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format, giving the list of oriented
 vertices with the x-, y-, and z-coordinates of the positions encoded by the properties <i>x</i>, <i>y</i>, and
 <i>z</i> and the x-, y-, and z-coordinates of the normals encoded by the properties <i>nx</i>, <i>ny</i>, and
 <i>nz</i> .<br>
@@ -183,328 +219,605 @@ Otherwise, the file should be an ascii file with groups of 6,
 white space delimited, numbers: x-, y-, and z-coordinates of the point's position, followed
 by the x-, y- and z-coordinates of the point's normal. (No information about the number of oriented point samples should be specified.)<br> 
 
-<DT>[<b>--out</b> &#60;<i>output triangle mesh</i>&#62;]
-<DD> This string is the name of the file to which the triangle mesh will be written. 
-The file is written in <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format.
+</dd><dt>[<b>--out</b> &lt;<i>output triangle mesh</i>&gt;]
+</dt><dd> This string is the name of the file to which the triangle mesh will be written. 
+The file is written in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format.
+
+</dd><dt>[<b>--tree</b> &lt;<i>output octree and coefficients</i>&gt;]
+</dt><dd> This string is the name of the file to which the the octree and solution coefficients are to be written.
 
-<DT>[<b>--voxel</b> &#60;<i>output voxel grid</i>&#62;]
-<DD> This string is the name of the file to which the sampled implicit function will be written.
-The filw is wrtten out in binary, with the first 4 bytes corresponding to the (integer) sampling resolution, 2^<i>d</i>,
-and the next 4 x 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i> bytes corresponding to the (single precision) floating point values
+</dd><dt>[<b>--grid</b> &lt;<i>output grid</i>&gt;]
+</dt><dd> This string is the name of the file to which the sampled implicit function will be written.
+The file is wrtten out in binary, with the first 4 bytes corresponding to the (integer) sampling resolution, 2^<i>d</i>,
+and the next 4 x 2^<i>d</i> x 2^<i>d</i> x ... bytes corresponding to the (single precision) floating point values
 of the implicit function.
 
-<DT>[<b>--degree</b> &#60;<i>B-spline degree</i>&#62;]
-<DD> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
-Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<BR>
+</dd><dt>[<b>--degree</b> &lt;<i>B-spline degree</i>&gt;]
+</dt><dd> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
+Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<br>
 The default value for this parameter is 2.
 
-<DT>[<b>--depth</b> &#60;<i>reconstruction depth</i>&#62;]
-<DD> This integer is the maximum depth of the tree that will be used for surface reconstruction.
-Running at depth <i>d</i> corresponds to solving on a voxel grid whose resolution is no larger than
-2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i>. Note that since the reconstructor adapts the octree to the
+</dd><dt>[<b>--depth</b> &lt;<i>reconstruction depth</i>&gt;]
+</dt><dd> This integer is the maximum depth of the tree that will be used for surface reconstruction.
+Running at depth <i>d</i> corresponds to solving on a grid whose resolution is no larger than
+2^<i>d</i> x 2^<i>d</i> x ... Note that since the reconstructor adapts the octree to the
 sampling density, the specified reconstruction depth is only an upper bound.<br>
 The default value for this parameter is 8.
 
-<DT>[<b>--scale</b> &#60;<i>scale factor</i>&#62;]
-<DD> This floating point value specifies the ratio between the diameter of the cube used for reconstruction
+</dd><dt>[<b>--width</b> &lt;<i>finest cell width</i>&gt;]
+</dt><dd> This floating point value specifies the target width of the finest level octree cells.<br>
+This parameter is ignored if the <B>--depth</B> is also specified.
+
+</dd><dt>[<b>--scale</b> &lt;<i>scale factor</i>&gt;]
+</dt><dd> This floating point value specifies the ratio between the diameter of the cube used for reconstruction
 and the diameter of the samples' bounding cube.<br>
 The default value is 1.1.
 
-<DT>[<b>--samplesPerNode</b> &#60;<i>minimum number of samples</i>&#62;]
-<DD> This floating point value specifies the minimum number of sample points that should fall within an
+</dd><dt>[<b>--samplesPerNode</b> &lt;<i>minimum number of samples</i>&gt;]
+</dt><dd> This floating point value specifies the minimum number of sample points that should fall within an
 octree node as the octree construction is adapted to sampling density. For noise-free samples, small values
 in the range [1.0 - 5.0] can be used. For more noisy samples, larger values in the range [15.0 - 20.0] may
 be needed to provide a smoother, noise-reduced, reconstruction.<br>
 The default value is 1.0.
 
-<DT>[<b>--valueWeight</b> &#60;<i>zero-crossing interpolation weight</i>&#62;]
-<DD> This floating point value specifies the importance that interpolation of the point samples
+</dd><dt>[<b>--valueWeight</b> &lt;<i>zero-crossing interpolation weight</i>&gt;]
+</dt><dd> This floating point value specifies the importance that interpolation of the point samples
 is given in the formulation of the screened Smoothed Signed Distance Reconstruction.<br>
-The default value for this parameter is 4.
+The default value for this parameter is 1.
 
-<DT>[<b>--gradientWeight</b> &#60;<i>normal interpolation weight</i>&#62;]
-<DD> This floating point value specifies the importance that interpolation of the points' normals
+</dd><dt>[<b>--gradientWeight</b> &lt;<i>normal interpolation weight</i>&gt;]
+</dt><dd> This floating point value specifies the importance that interpolation of the points' normals
 is given in the formulation of the screened Smoothed Signed Distance Reconstruction.<br>
-The default value for this parameter is 0.001.
+The default value for this parameter is 1.
 
-<DT>[<b>--biLapWeight</b> &#60;<i>bi-Laplacian weight weight</i>&#62;]
-<DD> This floating point value specifies the importance that the bi-Laplacian regularization
+</dd><dt>[<b>--biLapWeight</b> &lt;<i>bi-Laplacian weight weight</i>&gt;]
+</dt><dd> This floating point value specifies the importance that the bi-Laplacian regularization
 is given in the formulation of the screened Smoothed Signed Distance Reconstruction.<br>
-The default value for this parameter is 0.00001.
+The default value for this parameter is 1.
 
-<DT>[<b>--confidence</b>]
-<DD> Enabling this flag tells the reconstructor to use the size of the normals as confidence information. When the flag
-is not enabled, all normals are normalized to have unit-length prior to reconstruction.
+</dd><dt>[<b>--iters</b> &lt;<i>GS iters</i>&gt;]
+</dt><dd> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the hiearchy.<br>
+The default value for this parameter is 8.
 
-<DT>[<b>--nWeights</b>]
-<DD> Enabling this flag tells the reconstructor to use the size of the normals to modulate the interpolation weights. When the flag
-is not enabled, all points are given the same weight.
+</dd><dt>[<b>--density</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output the estimated depth values of the iso-surface vertices.
 
-<DT>[<b>--iters</b> &#60;<i>GS iters</i>&#62;]
-<DD> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the hiearchy.<br>
-The default value for this parameter is 8.
+</dd><dt>[<b>--normals</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output vertex normals, computed from the gradients of the implicit function.
 
-<DT>[<b>--cgDepth</b> &#60;<i>conjugate gradients solver depth</i>&#62;]
-<DD> This integer is the depth up to which a conjugate-gradients solver will be used to solve the linear system. Beyond this depth Gauss-Seidel relaxation will be used.<br>
-The default value for this parameter is 0.
+</dd><dt>[<b>--colors</b>]
+</dt><dd> Enabling this flag tells the reconstructor to read in color values with the input points and extrapolate those to the vertices of the output.
 
-<DT>[<b>--fullDepth</b> &#60;<i>adaptive octree depth</i>&#62;]
-<DD> This integer specifies the depth beyond depth the octree will be adapted.
-At coarser depths, the octree will be complete, containing all 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i> nodes.<br>
-The default value for this parameter is 5.
+</dd><dt>[<b>--data</b> &lt;<i>pull factor</i>&gt;]
+</dt><dd> If <B>--colors</B> is specified, this floating point value specifies the relative importance
+of finer color estimates over lower ones.<BR>
+The default value for this parameter is 32.
 
-<DT>[<b>--voxelDepth</b> &#60;<i>voxel sampling depth</i>&#62;]
-<DD> This integer is the depth of the regular grid over which the implicit function is to be sampled.
-Running at depth <i>d</i> corresponds to sampling on a voxel grid whose resolution is 2^<i>d</i> x 2^<i>d</i> x 2^<i>d</i>.<br>
-The default value for this parameter is the value of the <B>--depth</B> parameter.
+</dd><dt>[<b>--confidence</b> &lt;<i>normal confidence exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent to be applied to a point's confidence to adjust its weight. (A point's confidence is defined by the magnitude of its normal.)<BR>
+The default value for this parameter is 0.
 
-<DT>[<b>--primalVoxel</b>]
-<DD> Enabling this flag when outputing to a voxel file has the reconstructor sample the implicit function at the corners of the grid, rather than the centers of the cells.
+</dd><dt>[<b>--confidenceBias</b> &lt;<i>normal confidence bias exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent to be applied to a point's confidence to bias the resolution at which the sample contributes to the linear system. (Points with lower confidence are biased to contribute at coarser resolutions.)<BR>
+The default value for this parameter is 0.
 
-<DT>[<b>--color</b> &#60;<i>pull factor</i>&#62;]
-<DD> If specified, the reconstruction code assumes that the input is equipped with colors and will extrapolate
-the color values to the vertices of the reconstructed mesh. The floating point value specifies the relative importance
-of finer color estimates over lower ones. (In practice, we have found that a pull factor of 16 works well.)
+</dd><dt>[<b>--primalGrid</b>]
+</dt><dd> Enabling this flag when outputing to a grid file has the reconstructor sample the implicit function at the corners of the grid, rather than the centers of the cells.
 
-<DT>[<b>--density</b>]
-<DD> Enabling this flag tells the reconstructor to output the estimated depth values of the iso-surface vertices.
+</dd><dt>[<b>--nonLinearFit</b>]
+</dt><dd> Enabling this flag has the reconstructor use quadratic interpolation to estimate the positions of iso-vertices.
 
-<DT>[<b>--nonLinearFit</b>]
-<DD> Enabling this flag has the reconstructor use quadratic interpolation to estimate the positions of iso-vertices.
+</dd><dt>[<b>--polygonMesh</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output a polygon mesh (rather than triangulating the results of Marching Cubes).
 
-<DT>[<b>--polygonMesh</b>]
-<DD> Enabling this flag tells the reconstructor to output a polygon mesh (rather than triangulating the results of Marching Cubes).
+</dd><dt>[<b>--tempDir</b> &lt;<i>temporary output directory</i>&gt;]
+</dt><dd> This string is the name of the directory to which temporary files will be written.
 
-<DT>[<b>--threads</b> &#60;<i>number of processing threads</i>&#62;]
-<DD> This integer specifies the number of threads across which the reconstruction
-algorithm should be parallelized.<br>
+</dd><dt>[<b>--threads</b> &lt;<i>number of processing threads</i>&gt;]
+</dt><dd> This integer specifies the number of threads across which the algorithm should be parallelized.<br>
 The default value for this parameter is equal to the numer of (virtual) processors on the executing  machine.
 
-<DT>[<b>--verbose</b>]
-<DD> Enabling this flag provides a more verbose description of the running times and memory usages of
+</dd><dt>[<b>--maxMemory</b> &lt;<i>maximum memory usage (in GB)</i>&gt;]
+</dt><dd> If positive, this integer value specifies the peak memory utilization for running the reconstruction code (forcing the execution to terminate if the limit is exceeded).
+
+</dd><dt>[<b>--performance</b>]
+</dt><dd> Enabling this flag provides running time and peak memory usage at the end of the execution.
+
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of
 individual components of the surface reconstructor.
-</DIV>
-</DL>
-</UL>
 
+</dd>
+</DETAILS>
+</dl>
+</ul>
 
-<UL>
-<DL>
-<FONT SIZE="+1"><B><A HREF="" ONCLICK="toggleAll( $('surface_trimmer') ) ; return false;">SurfaceTrimmer</A></B></FONT>
-<DIV ID="surface_trimmer">
-<DT><b>--in</b> &#60;<i>input triangle mesh</i>&#62;
-<DD> This string is the name of the file from which the triangle mesh will be read. 
-The file is read in <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format and it is assumed that the vertices have a <I>value</I> field which stores the signal's value. (When run with <B>--density</B> flag, the reconstructor will output this field with the mesh vertices.)
-
-<DT><b>--trim</b> &#60;<i>trimming value</i>&#62;
-<DD> This floating point values specifies the value for mesh trimming. The subset of the mesh with signal value less than the trim value is discarded.
-
-<DT>[<b>--out</b> &#60;<i>output triangle mesh</i>&#62;]
-<DD> This string is the name of the file to which the triangle mesh will be written. 
-The file is written in <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format.
-
-<DT>[<b>--smooth</b> &#60;<i>smoothing iterations</i>&#62;]
-<DD> This integer values the number of umbrella smoothing operations to perform on the signal before trimming.<BR>
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>SurfaceTrimmer</b></font>:
+Trims off parts of a triangle mesh with a per-vertex signal whose value falls below a threshold (used for removing parts of a reconstructed surface that are generated in low-sampling-density regions)
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input triangle mesh</i>&gt;
+</dt><dd> This string is the name of the file from which the triangle mesh will be read. 
+The file is read in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format and it is assumed that the vertices have a <i>value</i> field which stores the signal's value. (When run with <b>--density</b> flag, the reconstructor will output this field with the mesh vertices.)
+
+</dd><dt><b>--trim</b> &lt;<i>trimming value</i>&gt;
+</dt><dd> This floating point values specifies the value for mesh trimming. The subset of the mesh with signal value less than the trim value is discarded.
+
+</dd><dt>[<b>--out</b> &lt;<i>output triangle mesh</i>&gt;]
+</dt><dd> This string is the name of the file to which the triangle mesh will be written. 
+The file is written in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format.
+
+</dd><dt>[<b>--smooth</b> &lt;<i>smoothing iterations</i>&gt;]
+</dt><dd> This integer values the number of umbrella smoothing operations to perform on the signal before trimming.<br>
 The default value is 5.
 
-<DT>[<b>--aRatio</b> &#60;<i>island area ratio</i>&#62;]
-<DD> This floating point value specifies the area ratio that defines a disconnected component as an "island". Connected components whose area, relative to the total area of the mesh, are smaller than this value will be merged into the output surface to close small holes, and will be discarded from the output surface to remove small disconnected components.<BR>
+</dd><dt>[<b>--aRatio</b> &lt;<i>island area ratio</i>&gt;]
+</dt><dd> This floating point value specifies the area ratio that defines a disconnected component as an "island". Connected components whose area, relative to the total area of the mesh, are smaller than this value will be merged into the output surface to close small holes, and will be discarded from the output surface to remove small disconnected components.<br>
 The default value 0.001.
 
-<DT>[<b>--polygonMesh</b>]
-<DD> Enabling this flag tells the trimmer to output a polygon mesh (rather than triangulating the trimming results).
-</DIV>
-</DL>
-</UL>
+</dd><dt>[<b>--polygonMesh</b>]
+</dt><dd> Enabling this flag tells the trimmer to output a polygon mesh (rather than triangulating the trimming results).
+
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of individual components of the surface reconstructor.
+
+</dd>
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>ImageStitching</b></font>:
+Stitches together a composite of image tiles into a seamless panorama by solving for the correction term (solving a 2D Laplacian system) <a href="http://www.agarwala.org/efficient_gdc/">[Agarwala, 2007]</A>
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input composite image</i>&gt; &lt;<i>input label image</i>&gt;
+</dt><dd> This pair of strings give the name of the composite image file and the associated label file.<BR>
+All pixels in the composite that come from the same source should be assigned the same color in the label file.<BR>
+PNG and JPG files are supported (though only PNG should be used for the label file as it is lossless).
+
+</dd><dt>[<b>--out</b> &lt;<i>output image</i>&gt;]
+</dt><dd> This string is the name of the file to which the stitched image will be written.<BR>
+PNG and JPG files are supported.
+
+</dd><dt>[<b>--degree</b> &lt;<i>B-spline degree</i>&gt;]
+</dt><dd> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
+Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<br>
+The default value for this parameter is 1.
+
+</dd><dt>[<b>--wScl</b> &lt;<i>successive under-relaxation scale</i>&gt;]
+</dt><dd> This floating point value specifies the scale for the adapted successive under-relaxation used to remove ringing.<br>
+The default value 0.125.
+
+</dd><dt>[<b>--wExp</b> &lt;<i>successive under-relaxation exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent for the adapted successive under-relaxation used to remove ringing.<br>
+The default value 6.
+
+</dd><dt>[<b>--iters</b> &lt;<i>GS iters</i>&gt;]
+</dt><dd> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the hiearchy.<br>
+The default value for this parameter is 8.
+
+</dd><dt>[<b>--threads</b> &lt;<i>number of processing threads</i>&gt;]
+</dt><dd> This integer specifies the number of threads across which the algorithm should be parallelized.<br>
+The default value for this parameter is equal to the numer of (virtual) processors on the executing  machine.
+
+</dd><dt>[<b>--maxMemory</b> &lt;<i>maximum memory usage (in GB)</i>&gt;]
+</dt><dd> If positive, this integer value specifies the peak memory utilization for running the code (forcing the execution to terminate if the limit is exceeded).
+
+</dd><dt>[<b>--performance</b>]
+</dt><dd> Enabling this flag provides running time and peak memory usage at the end of the execution.
+
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of
+individual components of the image stitcher.
+
+</dd>
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>EDTInHeat</b></font>:
+Computes the unsigned Euclidean Distance Transform of a triangle mesh (solving two 3D Laplacian systems) <A HREF="https://www.cs.cmu.edu/~kmcrane/Projects/HeatMethod/">[Crane, Weischedel, and Wardetzky, 2013]</A>
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input mesh</i>&gt;
+</dt><dd> This string is the name of the file from which the triangle mesh will be read. 
+The file is assumed to be in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format.
+
+</dd><dt>[<b>--out</b> &lt;<i>output octree and coefficients</i>&gt;]
+</dt><dd> This string is the name of the file to which the the octree and solution coefficients are to be written.
+
+</dd><dt>[<b>--degree</b> &lt;<i>B-spline degree</i>&gt;]
+</dt><dd> This integer specifies the degree of the B-spline that is to be used to define the finite elements system.
+Larger degrees support higher order approximations, but come at the cost of denser system matrices (incurring a cost in both space and time).<br>
+The default value for this parameter is 1.
+
+</dd><dt>[<b>--depth</b> &lt;<i>edt depth</i>&gt;]
+</dt><dd> This integer is the maximum depth of the tree that will be used for computing the Euclidean Distance Transform.
+Running at depth <i>d</i> corresponds to solving on a grid whose resolution is no larger than
+2^<i>d</i> x 2^<i>d</i> x ...<br>
+The default value for this parameter is 8.
+
+</dd><dt>[<b>--scale</b> &lt;<i>scale factor</i>&gt;]
+</dt><dd> This floating point value specifies the ratio between the diameter of the cube used for computing the EDT
+and the diameter of the mesh's bounding cube.<br>
+The default value is 2.
+
+</dd><dt>[<b>--diffusion</b> &lt;<i>diffusion time</i>&gt;]
+</dt><dd> This floating point value specifies the time-scale for the initial heat diffusion.<BR>
+The default value is 0.0005.
+
+</dd><dt>[<b>--valueWeight</b> &lt;<i>zero-crossing interpolation weight</i>&gt;]
+</dt><dd> This floating point value specifies the importance that the EDT evaluate to zero at points on the input mesh is given.<br>
+The default value for this parameter is 0.01.
+
+</dd><dt>[<b>--wScl</b> &lt;<i>successive under-relaxation scale</i>&gt;]
+</dt><dd> This floating point value specifies the scale for the adapted successive under-relaxation used to remove ringing.<br>
+The default value 0.125.
+
+</dd><dt>[<b>--wExp</b> &lt;<i>successive under-relaxation exponent</i>&gt;]
+</dt><dd> This floating point value specifies the exponent for the adapted successive under-relaxation used to remove ringing.<br>
+The default value 6.
+
+</dd><dt>[<b>--iters</b> &lt;<i>GS iters</i>&gt;]
+</dt><dd> This integer value specifies the number of Gauss-Seidel relaxations to be performed at each level of the hiearchy.<br>
+The default value for this parameter is 8.
+
+</dd><dt>[<b>--threads</b> &lt;<i>number of processing threads</i>&gt;]
+</dt><dd> This integer specifies the number of threads across which the algorithm should be parallelized.<br>
+The default value for this parameter is equal to the numer of (virtual) processors on the executing  machine.
+
+</dd><dt>[<b>--maxMemory</b> &lt;<i>maximum memory usage (in GB)</i>&gt;]
+</dt><dd> If positive, this integer value specifies the peak memory utilization for running the code (forcing the execution to terminate if the limit is exceeded).
+
+</dd><dt>[<b>--performance</b>]
+</dt><dd> Enabling this flag provides running time and peak memory usage at the end of the execution.
+
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of individual components of the EDT computation.
+
+</dd>
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>AdaptiveTreeVisualization</b></font>:
+Extracts iso-surfaces and a sampling on a regular grid from an implicit function represented over an adapted tree
+</SUMMARY>
+<dt><b>--in</b> &lt;<i>input tree and coefficients</i>&gt;
+</dt><dd> This string is the name of the file from which the tree and implicit functions coefficients are to be read. 
+
+</dd><dt>[<b>--grid</b> &lt;<i>output value grid</i>&gt;]
+</dt><dd> This string is the name of the file to which the sampling of the implicit along a regular grid will be written.<BR>
+The file is written out in binary, with the first 4 bytes corresponding to the (integer) sampling resolution, <i>R</i>,
+and the next 4 x <I>R</I>^<i>D</i> bytes corresponding to the (single precision) floating point values of the implicit function. (Here, <i>D</I> is the dimension.)
+
+</dd><dt>[<b>--primalGrid</b>]
+</dt><dd> Enabling this flag when outputing a grid file samples the implicit function at the corners of the grid, rather than the centers of the cells.
+
 
-<HR>
-<A NAME="USAGE"><B>USAGE</B></A><br>
+</dd><dt>[<b>--mesh</b> &lt;<i>output triangle mesh</i>&gt;]
+</dt><dd> This string is the name of the file to which the triangle mesh will be written. 
+The file is written in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format.<BR>
+This is only supported for dimension 3.
+
+</dd><dt>[<b>--iso</b> &lt;<i>iso-value for mesh extraction</i>&gt;]
+</dt><dd> This floating point value specifies the iso-value at which the implicit surface is to be extracted.<br>
+The default value is 0.
+
+</dd><dt>[<b>--nonLinearFit</b>]
+</dt><dd> Enabling this flag has the reconstructor use quadratic interpolation to estimate the positions of iso-vertices.
+
+</dd><dt>[<b>--polygonMesh</b>]
+</dt><dd> Enabling this flag tells the reconstructor to output a polygon mesh (rather than triangulating the results of Marching Cubes).
+
+</dd><dt>[<b>--flip</b>]
+</dt><dd> Enabling this flag flips the orientations of the output triangles.
+
+</dd><dt>[<b>--threads</b> &lt;<i>number of processing threads</i>&gt;]
+</dt><dd> This integer specifies the number of threads across which the algorithm should be parallelized.<br>
+The default value for this parameter is equal to the numer of (virtual) processors on the executing  machine.
+
+</dd><dt>[<b>--verbose</b>]
+</dt><dd> Enabling this flag provides a more verbose description of the running times and memory usages of
+individual components of the visualizer.
+
+</dd>
+</DETAILS>
+</dl>
+</ul>
+
+<hr>
+<a name="USAGE"><b>USAGE EXAMPLES (WITH SAMPLE DATA)</b></a><br>
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>PoissonRecon / SSDRecon / SurfaceTrimmer</b></font>
+</SUMMARY>
 For testing purposes, three point sets are provided:
-<OL>
+<ol>
 
-<LI> <A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.points.ply"><B>Eagle</B></A>:
-A set of 796,825 oriented point samples with color (represented in PLY format) was obtained in the EPFL <A HREF="http://lgg.epfl.ch/statues.php">Scanning 3D Statues from Photos</A> course.<br>
-<UL>
-<LI>The original Poisson Reconstruction algorithm can be invoked by calling:
-<BLOCKQUOTE><CODE>% PoissonRecon --in eagle.points.ply --out eagle.unscreened.ply --depth 10 --pointWeight 0</CODE></BLOCKQUOTE>
-using the <b>--pointWeight 0</b> argument to disable the screening.<br>
-
-<LI>By default, screening is enabled so the call:
-<BLOCKQUOTE><CODE>% PoissonRecon --in eagle.points.ply --out eagle.screened.ply --depth 10</CODE></BLOCKQUOTE>
-produces a reconstruction that more faithfully fits the input point positions.<BR>
-
-<LI> A reconstruction of the eagle that extrapolates the color values from the input samples can be obtained by calling:
-<BLOCKQUOTE><CODE>% PoissonRecon --in eagle.points.ply --out eagle.screened.color.ply --depth 10 --color 16</CODE></BLOCKQUOTE>
-using the <b>--color 16</b> to indicate both that color should be used, and the extent to which finer color estimates should be preferenced over coarser estimates.
-
-<LI> Finally, a reconstruction the eagle that does not close up the holes can be obtained by first calling:
-<BLOCKQUOTE><CODE>% PoissonRecon --in eagle.points.ply --out eagle.screened.color.ply --depth 10 --color 16 --density</CODE></BLOCKQUOTE>
-using the <B>--density</B> flag to indicate that density estimates should be output with the vertices of the mesh, and then calling:
-<BLOCKQUOTE><CODE>% SurfaceTrimmer --in eagle.screened.color.ply --out eagle.screened.color.trimmed.ply --trim 7</CODE></BLOCKQUOTE>
-to remove all subsets of the surface where the sampling density corresponds to a depth smaller than 7.
-</UL>
-<TABLE BORDER=1>
-<TR>
-<TD WIDTH="25%"><TABLE><TR><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.unscreened.1.jpg" WIDTH="100%"><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.unscreened.2.jpg" WIDTH="100%"></TR></TABLE>
-<TD WIDTH="25%"><TABLE><TR><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.1.jpg" WIDTH="100%"><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.2.jpg" WIDTH="100%"></TR></TABLE>
-<TD WIDTH="25%"><TABLE><TR><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.1.color.jpg" WIDTH="100%"><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.2.color.jpg" WIDTH="100%"></TR></TABLE>
-<TD WIDTH="25%"><TABLE><TR><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.1.color.trimmed.jpg" WIDTH="100%"><TD WIDTH="50%"><IMG SRC="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.2.color.trimmed.jpg" WIDTH="100%"></TR></TABLE>
-</TR>
-<TR>
-<TH>Unscreened
-<TH>Screened
-<TH>Screened + Color
-<TH>Screened + Color + Trimmed
-</TR>
-</TABLE>
-
-<LI> <A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/bunny.points.ply"><B>Bunny</B></A>:
+<li> <a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/horse.npts"><b>Horse</b></a>:
+A set of 100,000 oriented point samples (represented in ASCII format) was obtained by sampling a virtual horse model with a sampling density proportional to curvature, giving a set of non-uniformly distributed points.<br>
+The surface of the model can be reconstructed by calling the either Poisson surface reconstructor:
+<blockquote><code>% PoissonRecon --in horse.npts --out horse.ply --depth 10</code></blockquote>
+or the SSD surface reconstructor
+<blockquote><code>% SSDRecon --in horse.npts --out horse.ply --depth 10</code></blockquote>
+</li>
+
+<li> <a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/bunny.points.ply"><b>Bunny</b></a>:
 A set of 362,271 oriented point samples (represented in PLY format) was obtained by merging the data from the original Stanford Bunny
-<A HREF="ftp://graphics.stanford.edu/pub/3Dscanrep/bunny.tar.gz">range scans</A>. The orientation of the sample points was estimated
+<a href="ftp://graphics.stanford.edu/pub/3Dscanrep/bunny.tar.gz">range scans</a>. The orientation of the sample points was estimated
 using the connectivity information within individual range scans.<br>
-The surface of the model can be reconstructed by calling the surface reconstructor as follows:
-<BLOCKQUOTE><CODE>% PoissonRecon --in bunny.points.ply --out bunny.ply --depth 10</CODE></BLOCKQUOTE>
-
-<LI> <A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/horse.npts"><B>Horse</B></A>:
-A set of 100,000 oriented point samples (represented in ASCII format) was obtained by sampling a virtual horse model with a sampling density proportional to curvature, giving a set of non-uniformly distributed points.<br>
-The surface of the model can be reconstructed by calling the surface reconstructor as follows:
-<BLOCKQUOTE><CODE>% PoissonRecon --in horse.npts --out horse.ply --depth 10</CODE></BLOCKQUOTE>
-
-</OL>
-
-To convert the binary <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format to
-<A HREF="http://research.microsoft.com/~hoppe/">Hugues Hoppe's</A> ASCII
-mesh format, a <A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/ply2mesh">Perl script</A> is provided.<br>
-As an examples, the reconstructed bunny can be converted into the ASCII mesh format as follows:
-<BLOCKQUOTE><CODE>% ply2mesh.pl bunny.ply &gt; bunny.m</CODE></BLOCKQUOTE>
-
-<HR>
-<A NAME="CHANGES"><B>CHANGES</B></A><br>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version3/">Version 3</A>:
-<OL>
-<LI> The implementation of the <b>--samplesPerNode</b> parameter has been modified so that a value of "1" more closely corresponds to a distribution with one sample per leaf node.
-<LI> The code has been modified to support compilation under MSVC 2010 and the associated solution and project files are now provided. (Due to a bug in the Visual Studios compiler, this required modifying the implementation of some of the bit-shifting operators.)
-</OL>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4/">Version 4</A>:
-<OL>
-<LI> The code supports screened reconstruction, with interpolation weight specified through the <b>--pointWeight</b> parameter.
-<LI> The code has been implemented to support parallel processing, with the number of threads used for parallelization specified by the <b>--threads</b> parameter.
-<LI> The input point set can now also be in <A HREF="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</A> format, and the file-type is determined by the extension, so that the <b>--binary</b> flag is now obsolete.
-<LI> At depths coarser than the one specified by the value <b>--minDepth</b> the octree is no longer adaptive but rather complete, simplifying the prolongation operator.
-</OL>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">Version 4.5</A>:
-<OL>
-<LI> The algorithmic complexity of the solver was reduced from log-linear to linear.
-</OL>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">Version 4.51</A>:
-<OL>
-<LI> Smart pointers were added to ensure that memory accesses were in bounds.
-</OL>
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5/">Version 5</A>:
-<OL>
-<LI> The <B>--density</B> flag was added to the reconstructor to output the estimated depth of the iso-vertices.
-<LI> The <I>SurfaceTrimmer</I> executable was added to support trimming off the subset of the reconstructed surface that are far away from the input samples, thereby allowing for the generation of non-water-tight surface.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.1/">Version 5.1</A>:
-<OL>
-<LI> Minor bug-fix to address incorrect neighborhood estimation in the octree finalization.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.5a/">Version 5.5a</A>:
-<OL>
-<LI> Modified to support depths greater than 14. (Should work up to 18 or 19 now.)
-<LI> Improved speed and memory performance by removing the construction of integral and value tables.
-<LI> Fixed a bug in Version 5.5 that used memory and took more time without doing anything useful.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.6/">Version 5.6</A>:
-<OL>
-<LI> Added the <b>--normalWeight</b> flag to support setting a point's interpolation weight in proportion to the magnitude of its normal.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.7/">Version 5.7</A>:
-<OL>
-<LI> Modified the setting of the constraints, replacing the map/reduce implementation with OpenMP atomics to reduce memory usage.
-<LI> Fixed bugs that caused numerical overflow when processing large point clouds on multi-core machines.
-<LI> Improved efficiency of the iso-surface extraction phse.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.71/">Version 5.71</A>:
-<OL>
-<LI> Added the function <I>GetSolutionValue</I> to support the evaluation of the implicit function at a specific point.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6/">Version 6</A>:
-<OL>
-<LI> Modified the solver to use Gauss-Seidel relaxation instead of conjugate-gradients at finer resolution.
-<LI> Re-ordered the implementation of the solver so that only a windowed subset of the matrix is in memory at any time, thereby reducing the memory usage during the solver phase.
-<LI> Separated the storage of the data associated with the octree nodes from the topology.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.1/">Version 6.1</A>:
-<OL>
-<LI> Re-ordered the implementation of the iso-surface extraction so that only a windowed subset of the octree is in memory at any time, thereby reducing the memory usage during the extracted phase.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.11/">Version 6.11</A>:
-<OL>
-<LI> Fixed a bug that created a crash in the evaluation phase when <b>--pointWeight</b> is set zero.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.12/">Version 6.12</A>:
-<OL>
-<LI> Removed the OpenMP <I>firstprivate</I> directive as it seemed to cause trouble under Linux compilations.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13/">Version 6.13</A>:
-<OL>
-<LI> Added a <B>MemoryPointStream</B> class in <I>PointStream.inl</I> to support in-memory point clouds.
-<LI> Modified the signature of <U>Octree::SetTree</U> in <I>MultiGridOctreeData.h</I> to take in a pointer to an object of type <B>PointStream</B> rather than a file-name.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13a/">Version 6.13a</A>:
-<OL>
-<LI> Modified the signature of <U>Octree::SetIsoSurface</U> to rerun a <I>void</I>. [<A HREF="http://www.danielgm.net/cc/">cloudcompare</A>]
-<LI> Added a definition of <U>SetIsoVertexValue</U> supporting double precision vertices. [<A HREF="http://www.danielgm.net/cc/">cloudcompare</A>]
-<LI> Removed <I>Time.[h/cpp]</I> from the repository. [<A HREF="http://www.danielgm.net/cc/">cloudcompare</A>/<A HREF="http://asmaloney.com/">asmaloney</A>]
-<LI> Fixed assignment bug in <U>Octree::SetSliceIsoVertices</U>. [<A HREF="http://asmaloney.com/">asmaloney</A>]
-<LI> Fixed initialization bug in <U>SortedTreeNodes::SliceTableData</U> and <U>SortedTreeNodes::XSliceTableData</U>. [<A HREF="http://asmaloney.com/">asmaloney</A>]
-<LI> Included <I>stdlib.h</I> in <I>Geometry.h</I>. [<A HREF="http://asmaloney.com/">asmaloney</A>]
-<LI> Fixed default value bug in declaration of <U>Octree::SetTree</U>. [<A HREF="http://asmaloney.com/">asmaloney</A>]
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version7.0/">Version 7.0</A>:
-<OL>
-<LI> Added functionality to support color extrapolation if present in the input.
-<LI> Modified a bug with the way in which sample contributions were scaled.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version8.0/">Version 8.0</A>:
-<OL>
-<LI> Added support for different degree B-splines.
+The original (unscreened) Poisson reconstruction can be obtained by setting the point interpolation weight to zero:
+<blockquote><code>% PoissonRecon --in bunny.points.ply --out bunny.ply --depth 10 --pointWeight 0</code></blockquote>
+By default, the Poisson surface reconstructor uses degree-2 B-splines. A more efficient reconstruction can be obtained using degree-1 B-splines:
+<blockquote><code>% PoissonRecon --in bunny.points.ply --out bunny.ply --depth 10 --pointWeight 0 --degree 1</code></blockquote>
+(The SSD reconstructor requires B-splines of degree at least 2 since second derivatives are required to formulate the bi-Laplacian energy.)
+</li>
+
+<li> <a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/eagle.points.ply"><b>Eagle</b></a>:
+A set of 796,825 oriented point samples with color (represented in PLY format) was obtained in the EPFL <a href="http://lgg.epfl.ch/statues.php">Scanning 3D Statues from Photos</a> course.<br>
+A reconstruction of the eagle that extrapolates the color values from the input samples can be obtained by calling:
+<blockquote><code>% PoissonRecon --in eagle.points.ply --out eagle.screened.color.ply --depth 10 --colors</code></blockquote>
+using the <b>--colors</b> flag to indicate that color extrapolation should be used.<BR>
+A reconstruction of the eagle that does not close up the holes can be obtained by first calling:
+<blockquote><code>% SSDRecon --in eagle.points.ply --out eagle.screened.color.ply --depth 10 --colors --density</code></blockquote>
+using the <b>--density</b> flag to indicate that density estimates should be output with the vertices of the mesh, and then calling:
+<blockquote><code>% SurfaceTrimmer --in eagle.screened.color.ply --out eagle.screened.color.trimmed.ply --trim 7</code></blockquote>
+to remove all subsets of the surface where the sampling density corresponds to a depth smaller than 7.
+</li>
+
+</ol>
+
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>ImageStitching</b></font>
+</SUMMARY>
+For testing purposes, two panoramas are provided: <a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Jaffa.zip"><b>Jaffa</b></a> (23794 x 9492 pixels) and <a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/OldRag.zip"><b>OldRag</b></a> (87722 x 12501 pixels).
+
+A seamless panorama can be obtained by running:
+<blockquote><code>% ImageSitching --in pixels.png labels.png --out out.png</code></blockquote>
+
+</DETAILS>
+</dl>
+</ul>
+
+
+<ul>
+<dl>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b>EDTInHeat / AdaptiveTreeVisualization</b></font>
+</SUMMARY>
+The Euclidean Distance Tranform of the reconstructed horse can be obtained by running:
+<blockquote><code>% EDTInHeat --in horse.ply --out horse.edt --depth 9</code></blockquote>
+Then, the visualization code can be used to extract iso-surfaces from the implicit function.<BR>
+To obtain a visualization near the input surface, use an iso-value close to zero:
+<blockquote><code>% AdaptiveTreeVisualization.exe --in horse.edt --mesh horse_0.01_.ply --iso 0.01 --flip</code></blockquote>
+(By default, the surface is aligned so that the outward facing normal aligns with the negative gradient. Hence, specifying the <CODE>--flip</CODE> flag is used to re-orient the surface.)<BR>
+To obtain a visualization closer to the boundary of the bounding box, use an iso-value close to zero:
+<blockquote><code>% AdaptiveTreeVisualization.exe --in horse.edt --mesh horse_0.25_.ply --iso 0.25 --flip</code></blockquote>
+(Since the default <CODE>--scale</CODE> is 2, a value of 0.25 should still give a surface that is contained within the bounding box.)<BR>
+To obtain a sampling of the implicit function over a regular grid:
+<blockquote><code>% AdaptiveTreeVisualization.exe --in horse.edt --grid horse.grid</code></blockquote>
+
+</DETAILS>
+</dl>
+</ul>
+
+
+<hr>
+<DETAILS>
+<SUMMARY>
+<font size="+1"><b><B>HISTORY OF CHANGES</B></b></font>
+</SUMMARY>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version3/">Version 3</a>:
+<ol>
+<li> The implementation of the <b>--samplesPerNode</b> parameter has been modified so that a value of "1" more closely corresponds to a distribution with one sample per leaf node.
+</li><li> The code has been modified to support compilation under MSVC 2010 and the associated solution and project files are now provided. (Due to a bug in the Visual Studios compiler, this required modifying the implementation of some of the bit-shifting operators.)
+</li></ol>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4/">Version 4</a>:
+<ol>
+<li> The code supports screened reconstruction, with interpolation weight specified through the <b>--pointWeight</b> parameter.
+</li><li> The code has been implemented to support parallel processing, with the number of threads used for parallelization specified by the <b>--threads</b> parameter.
+</li><li> The input point set can now also be in <a href="http://www.cc.gatech.edu/projects/large_models/ply.html">PLY</a> format, and the file-type is determined by the extension, so that the <b>--binary</b> flag is now obsolete.
+</li><li> At depths coarser than the one specified by the value <b>--minDepth</b> the octree is no longer adaptive but rather complete, simplifying the prolongation operator.
+</li></ol>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">Version 4.5</a>:
+<ol>
+<li> The algorithmic complexity of the solver was reduced from log-linear to linear.
+</li></ol>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version4.5/">Version 4.51</a>:
+<ol>
+<li> Smart pointers were added to ensure that memory accesses were in bounds.
+</li></ol>
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5/">Version 5</a>:
+<ol>
+<li> The <b>--density</b> flag was added to the reconstructor to output the estimated depth of the iso-vertices.
+</li><li> The <i>SurfaceTrimmer</i> executable was added to support trimming off the subset of the reconstructed surface that are far away from the input samples, thereby allowing for the generation of non-water-tight surface.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.1/">Version 5.1</a>:
+<ol>
+<li> Minor bug-fix to address incorrect neighborhood estimation in the octree finalization.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.5a/">Version 5.5a</a>:
+<ol>
+<li> Modified to support depths greater than 14. (Should work up to 18 or 19 now.)
+</li><li> Improved speed and memory performance by removing the construction of integral and value tables.
+</li><li> Fixed a bug in Version 5.5 that used memory and took more time without doing anything useful.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.6/">Version 5.6</a>:
+<ol>
+<li> Added the <b>--normalWeight</b> flag to support setting a point's interpolation weight in proportion to the magnitude of its normal.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.7/">Version 5.7</a>:
+<ol>
+<li> Modified the setting of the constraints, replacing the map/reduce implementation with OpenMP atomics to reduce memory usage.
+</li><li> Fixed bugs that caused numerical overflow when processing large point clouds on multi-core machines.
+</li><li> Improved efficiency of the iso-surface extraction phse.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version5.71/">Version 5.71</a>:
+<ol>
+<li> Added the function <i>GetSolutionValue</i> to support the evaluation of the implicit function at a specific point.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6/">Version 6</a>:
+<ol>
+<li> Modified the solver to use Gauss-Seidel relaxation instead of conjugate-gradients at finer resolution.
+</li><li> Re-ordered the implementation of the solver so that only a windowed subset of the matrix is in memory at any time, thereby reducing the memory usage during the solver phase.
+</li><li> Separated the storage of the data associated with the octree nodes from the topology.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.1/">Version 6.1</a>:
+<ol>
+<li> Re-ordered the implementation of the iso-surface extraction so that only a windowed subset of the octree is in memory at any time, thereby reducing the memory usage during the extracted phase.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.11/">Version 6.11</a>:
+<ol>
+<li> Fixed a bug that created a crash in the evaluation phase when <b>--pointWeight</b> is set zero.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.12/">Version 6.12</a>:
+<ol>
+<li> Removed the OpenMP <i>firstprivate</i> directive as it seemed to cause trouble under Linux compilations.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13/">Version 6.13</a>:
+<ol>
+<li> Added a <b>MemoryPointStream</b> class in <i>PointStream.inl</i> to support in-memory point clouds.
+</li><li> Modified the signature of <u>Octree::SetTree</u> in <i>MultiGridOctreeData.h</i> to take in a pointer to an object of type <b>PointStream</b> rather than a file-name.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version6.13a/">Version 6.13a</a>:
+<ol>
+<li> Modified the signature of <u>Octree::SetIsoSurface</u> to rerun a <i>void</i>. [<a href="http://www.danielgm.net/cc/">cloudcompare</a>]
+</li><li> Added a definition of <u>SetIsoVertexValue</u> supporting double precision vertices. [<a href="http://www.danielgm.net/cc/">cloudcompare</a>]
+</li><li> Removed <i>Time.[h/cpp]</i> from the repository. [<a href="http://www.danielgm.net/cc/">cloudcompare</a>/<a href="http://asmaloney.com/">asmaloney</a>]
+</li><li> Fixed assignment bug in <u>Octree::SetSliceIsoVertices</u>. [<a href="http://asmaloney.com/">asmaloney</a>]
+</li><li> Fixed initialization bug in <u>SortedTreeNodes::SliceTableData</u> and <u>SortedTreeNodes::XSliceTableData</u>. [<a href="http://asmaloney.com/">asmaloney</a>]
+</li><li> Included <i>stdlib.h</i> in <i>Geometry.h</i>. [<a href="http://asmaloney.com/">asmaloney</a>]
+</li><li> Fixed default value bug in declaration of <u>Octree::SetTree</u>. [<a href="http://asmaloney.com/">asmaloney</a>]
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version7.0/">Version 7.0</a>:
+<ol>
+<li> Added functionality to support color extrapolation if present in the input.
+</li><li> Modified a bug with the way in which sample contributions were scaled.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version8.0/">Version 8.0</a>:
+<ol>
+<li> Added support for different degree B-splines.
 (Note that as the B-spline degree is a template parameter, only degree 1 through 4 are supported.
-If higher order degrees are desired, additional template parameters can be easily added in the body of the <U>Execute</U> function inside of <I>PoissonRecon.cpp</I>.
+If higher order degrees are desired, additional template parameters can be easily added in the body of the <u>Execute</u> function inside of <i>PoissonRecon.cpp</i>.
 Similarly, to reduce compilation times, support for specific degrees can be removed.)
-<LI> Added the <B>--primalVoxel</B> flag to support to extraction of a voxel grid using primal sampling.
-<LI> Changed the implementation of the voxel sampling so that computation is now linear, rather than log-linear, in the number of samples.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.0/">Version 9.0</A>:
-<OL>
-<LI> Added support for free boundary conditions.
-<LI> Extended the solver to support more general linear systems. This makes it possible to use the same framework to implement the <A HREF="http://mesh.brown.edu/ssd/">Smoothed Signed Distance Reconstruction</A> of Calakli and Taubin (2011).
-<LI> Modified the implementation of density estimation and input representation. This tends to define a slightly larger system. On its own, this results in slightly increased running-time/footprint for full-res reconstructions, but provides a substantially faster implementation when the output complexity is smaller than the input.
-</OL>
-
-<A HREF="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/">Version 9.01</A>:
-<OL>
-<LI> Reverted the density estimation to behave as in Version 8.0.
-</OL>
-
-<HR>
-<A NAME="SUPPORT"><B>SUPPORT</B></A><br>
+</li><li> Added the <b>--primalGrid</b> flag to support to extraction of a grid using primal sampling.
+</li><li> Changed the implementation of the grid sampling so that computation is now linear, rather than log-linear, in the number of samples.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.0/">Version 9.0</a>:
+<ol>
+<li> Added support for free boundary conditions.
+</li><li> Extended the solver to support more general linear systems. This makes it possible to use the same framework to implement the <a href="http://mesh.brown.edu/ssd/">Smoothed Signed Distance Reconstruction</a> of Calakli and Taubin (2011).
+</li><li> Modified the implementation of density estimation and input representation. This tends to define a slightly larger system. On its own, this results in slightly increased running-time/footprint for full-res reconstructions, but provides a substantially faster implementation when the output complexity is smaller than the input.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/">Version 9.01</a>:
+<ol>
+<li> Reverted the density estimation to behave as in Version 8.0.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version9.01/">Version 9.011</a>:
+<ol>
+<li> Added a parameter for specifying the temporary directory.
+</li></ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.00/">Version 10.00</a>:
+<ol>
+<li> The code has been reworked to support arbitrary dimensions, finite elements of arbitrary degree, generally SPD systems in the evaluated/integrated values and derivatives of the functions, etc.</LI>
+<LI> For the reconstruction code, added the <B>--width</B> flag which allows the system to compute the depth of the octree given a target depth for the finest resolution nodes.</LI>
+<LI> For the reconstruction code, fixed a bug in the handling of the confidence encoded in the lengths of the normals. In addition, added the flags <B>--confidence</B> and <B>--confidenceBias</B> which allow the user more control of how confidence is used to affect the contribution of a sample.</LI>
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.01/">Version 10.01</a>:
+<ol>
+<li> Modified the reconstruction code to facilitate interpolation of other input-sample quantities, in addition to color.</LI>
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.02/">Version 10.02</a>:
+<ol>
+<li> Set the default value for <b>--degree</B> in PoissonRecon to 1 and change the definitiion of <I>DATA_DEGREE</I> to 0 for sharper color interpolation.</LI>
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.03/">Version 10.03</a>:
+<ol>
+<li> Cleaned up memory leaks and fixed a bug causing ImageStitching and EDTInHeat to SEGFAULT on Linux.
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.04/">Version 10.04</a>:
+<ol>
+<li> Replaced the ply I/O code with an object-oriented implementation.
+<LI> Updated the code to support compilation under gcc version 4.8.
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.05/">Version 10.05</a>:
+<ol>
+<LI> Added cleaner support for warning and error handling.
+<LI> Minor bug fixes.
+<LI> Added a <B>--inCore</B> flag that enables keeping the pointset in memory instead of streaming it in from disk.
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.06/">Version 10.06</a>:
+<ol>
+<LI> Improved performance.
+<LI> Modified <CODE>PoissonRecon</CODE> and <CODE>SSDRecon</CODE> to support processing of 2D point sets.
+<LI> Modified the 2D implementations of <CODE>PoissonRecon</CODE>, <CODE>SSDRecon</CODE>, and <CODE>AdaptiveTreeVisualization</CODE> to support ouput to <CODE>.jpg</CODE> and <CODE>.png</CODE> image files.
+</ol>
+
+<a href="http://www.cs.jhu.edu/~misha/Code/PoissonRecon/Version10.07/">Version 10.07</a>:
+<ol>
+<LI> Removed a bug that would cause memory access errors when some slices were empty.
+</ol>
+
+</DETAILS>
+
+
+<hr>
+<a name="SUPPORT"><b>SUPPORT</b></a><br>
 This work genersouly supported by NSF grants #0746039 and #1422325.
 
-<HR>
-<A HREF="http://www.cs.jhu.edu/~misha">HOME</A>
+<hr>
+<a href="http://www.cs.jhu.edu/~misha">HOME</a>
+
diff --git a/SSDRecon.vcxproj b/SSDRecon.vcxproj
index 5ddf889..1a5bd3e 100644
--- a/SSDRecon.vcxproj
+++ b/SSDRecon.vcxproj
@@ -1,234 +1,161 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="Src\PlyFile.cpp" />
+    <ClCompile Include="Src\SSDRecon.cpp" />
+  </ItemGroup>
   <PropertyGroup Label="Globals">
-    <ProjectName>SSDRecon</ProjectName>
-    <ProjectGuid>{7838CA1E-8A39-4A2B-AC3D-3E25FEAEA2D6}</ProjectGuid>
-    <RootNamespace>PoissonRecon</RootNamespace>
+    <ProjectGuid>{477765F8-C16A-406B-807F-1302DAE74EBA}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
-    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <RootNamespace>SSDRecon</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+    <ProjectName>SSDRecon</ProjectName>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)\Obj\$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)Bin\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
-    <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.exe</TargetExt>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
-      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
-      <RandomizedBaseAddress>false</RandomizedBaseAddress>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <TargetMachine>MachineX86</TargetMachine>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
     <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
-      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <OpenMPSupport>true</OpenMPSupport>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
-      <RandomizedBaseAddress>false</RandomizedBaseAddress>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <TargetMachine>MachineX64</TargetMachine>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <OpenMPSupport>true</OpenMPSupport>
-      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
-      <LargeAddressAware>true</LargeAddressAware>
-      <OptimizeReferences>true</OptimizeReferences>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <RandomizedBaseAddress>false</RandomizedBaseAddress>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <TargetMachine>MachineX86</TargetMachine>
-      <AdditionalDependencies>psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
     <ClCompile>
-      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <FloatingPointModel>Precise</FloatingPointModel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <OpenMPSupport>true</OpenMPSupport>
-      <IntrinsicFunctions>false</IntrinsicFunctions>
-      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <AdditionalOptions>
+      </AdditionalOptions>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
-      <OptimizeReferences>true</OptimizeReferences>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <RandomizedBaseAddress>false</RandomizedBaseAddress>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <TargetMachine>MachineX64</TargetMachine>
-      <AdditionalDependencies>psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>ZLIB.lib;JPEG.lib;PNG.lib;psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp" />
-    <ClCompile Include="Src\Factor.cpp" />
-    <ClCompile Include="Src\Geometry.cpp" />
-    <ClCompile Include="Src\MarchingCubes.cpp" />
-    <ClCompile Include="Src\PlyFile.cpp" />
-    <ClCompile Include="Src\SSDRecon.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\Allocator.h" />
-    <ClInclude Include="Src\Array.h" />
-    <ClInclude Include="Src\BinaryNode.h" />
-    <ClInclude Include="Src\BSplineData.h" />
-    <ClInclude Include="Src\CmdLineParser.h" />
-    <ClInclude Include="Src\Factor.h" />
-    <ClInclude Include="Src\Geometry.h" />
-    <ClInclude Include="Src\MarchingCubes.h" />
-    <ClInclude Include="Src\MAT.h" />
-    <ClInclude Include="Src\MemoryUsage.h" />
-    <ClInclude Include="Src\MultiGridOctreeData.h" />
-    <ClInclude Include="Src\MyTime.h" />
-    <ClInclude Include="Src\Octree.h" />
-    <ClInclude Include="Src\Ply.h" />
-    <ClInclude Include="Src\PointStream.h" />
-    <ClInclude Include="Src\Polynomial.h" />
-    <ClInclude Include="Src\PPolynomial.h" />
-    <ClInclude Include="Src\SparseMatrix.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\Array.inl" />
-    <None Include="Src\CmdLineParser.inl" />
-    <None Include="Src\MAT.inl" />
-    <None Include="Src\MultiGridOctreeData.Evaluation.inl" />
-    <None Include="Src\MultiGridOctreeData.IsoSurface.inl" />
-    <None Include="Src\MultiGridOctreeData.SortedTreeNodes.inl" />
-    <None Include="Src\MultiGridOctreeData.System.inl" />
-    <None Include="Src\MultiGridOctreeData.WeightedSamples.inl" />
-    <None Include="Src\PointStream.inl" />
-    <None Include="Src\BSplineData.inl" />
-    <None Include="Src\Geometry.inl" />
-    <None Include="Src\MultiGridOctreeData.inl" />
-    <None Include="Src\Octree.inl" />
-    <None Include="Src\Polynomial.inl" />
-    <None Include="Src\PPolynomial.inl" />
-    <None Include="Src\SparseMatrix.inl" />
-  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/SSDRecon.vcxproj.filters b/SSDRecon.vcxproj.filters
deleted file mode 100644
index ce331d6..0000000
--- a/SSDRecon.vcxproj.filters
+++ /dev/null
@@ -1,143 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Include Files">
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
-      <Extensions>inc;inl</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\Factor.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\Geometry.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\MarchingCubes.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\PlyFile.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\SSDRecon.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\Allocator.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\BinaryNode.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\BSplineData.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\CmdLineParser.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Factor.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Geometry.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MarchingCubes.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MemoryUsage.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MultiGridOctreeData.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Octree.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\PointStream.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Polynomial.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\PPolynomial.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\SparseMatrix.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MAT.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Array.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Ply.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MyTime.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\BSplineData.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Geometry.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Octree.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Polynomial.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\PPolynomial.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\SparseMatrix.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\PointStream.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\CmdLineParser.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MAT.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\Array.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.SortedTreeNodes.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.IsoSurface.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.Evaluation.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.WeightedSamples.inl">
-      <Filter>Include Files</Filter>
-    </None>
-    <None Include="Src\MultiGridOctreeData.System.inl">
-      <Filter>Include Files</Filter>
-    </None>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/Src/AdaptiveTreeVisualization.cpp b/Src/AdaptiveTreeVisualization.cpp
new file mode 100644
index 0000000..6720cdc
--- /dev/null
+++ b/Src/AdaptiveTreeVisualization.cpp
@@ -0,0 +1,302 @@
+/*
+Copyright (c) 2016, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#undef ARRAY_DEBUG
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <float.h>
+#include "MyMiscellany.h"
+#include "CmdLineParser.h"
+#include "PPolynomial.h"
+#include "FEMTree.h"
+#include "Ply.h"
+#include "PointStreamData.h"
+#include "Image.h"
+
+cmdLineParameter< char* >
+	In( "in" ) ,
+	OutMesh( "mesh" ) ,
+	OutGrid( "grid" );
+
+cmdLineReadable
+	PolygonMesh( "polygonMesh" ) ,
+	NonManifold( "nonManifold" ) ,
+	FlipOrientation( "flip" ) ,
+	ASCII( "ascii" ) ,
+	NonLinearFit( "nonLinearFit" ) ,
+	PrimalGrid( "primalGrid" ) ,
+	Verbose( "verbose" );
+
+cmdLineParameter< int >
+	Threads( "threads" , omp_get_num_procs() );
+
+cmdLineParameter< float >
+	IsoValue( "iso" , 0.f );
+
+cmdLineReadable* params[] =
+{
+	&In , 
+	&OutMesh , &NonManifold , &PolygonMesh , &FlipOrientation , &ASCII , &NonLinearFit , &IsoValue ,
+	&OutGrid , &PrimalGrid ,
+	&Threads ,
+	&Verbose , 
+	NULL
+};
+
+
+void ShowUsage( char* ex )
+{
+	printf( "Usage: %s\n" , ex );
+	printf( "\t --%s <input tree>\n" , In.name );
+	printf( "\t[--%s <ouput triangle mesh>]\n" , OutMesh.name );
+	printf( "\t[--%s <ouput grid>]\n" , OutGrid.name );
+#ifdef _OPENMP
+	printf( "\t[--%s <num threads>=%d]\n" , Threads.name , Threads.value );
+#endif // _OPENMP
+	printf( "\t[--%s <iso-value for extraction>=%f]\n" , IsoValue.name , IsoValue.value );
+	printf( "\t[--%s]\n" , NonManifold.name );
+	printf( "\t[--%s]\n" , PolygonMesh.name );
+	printf( "\t[--%s]\n" , NonLinearFit.name );
+	printf( "\t[--%s]\n" , FlipOrientation.name );
+	printf( "\t[--%s]\n" , PrimalGrid.name );
+	printf( "\t[--%s]\n" , ASCII.name );
+	printf( "\t[--%s]\n" , Verbose.name );
+}
+
+template< typename Real , unsigned int Dim >
+void WriteGrid( ConstPointer( Real ) values , int res , const char *fileName )
+{
+	int resolution = 1;
+	for( int d=0 ; d<Dim ; d++ ) resolution *= res;
+
+	char *ext = GetFileExtension( fileName );
+
+	if( Dim==2 && ImageWriter::ValidExtension( ext ) )
+	{
+		Real avg = 0;
+#pragma omp parallel for reduction( + : avg )
+		for( int i=0 ; i<resolution ; i++ ) avg += values[i];
+		avg /= (Real)resolution;
+
+		Real std = 0;
+#pragma omp parallel for reduction( + : std )
+		for( int i=0 ; i<resolution ; i++ ) std += ( values[i] - avg ) * ( values[i] - avg );
+		std = (Real)sqrt( std / resolution );
+
+		if( Verbose.set ) printf( "Grid to image: [%.2f,%.2f] -> [0,255]\n" , avg - 2*std , avg + 2*std );
+
+		unsigned char *pixels = new unsigned char[ resolution*3 ];
+#pragma omp parallel for
+		for( int i=0 ; i<resolution ; i++ )
+		{
+			Real v = (Real)std::min< Real >( (Real)1. , std::max< Real >( (Real)-1. , ( values[i] - avg ) / (2*std ) ) );
+			v = (Real)( ( v + 1. ) / 2. * 256. );
+			unsigned char color = (unsigned char )std::min< Real >( (Real)255. , std::max< Real >( (Real)0. , v ) );
+			for( int c=0 ; c<3 ; c++ ) pixels[i*3+c ] = color;
+		}
+		ImageWriter::Write( fileName , pixels , res , res , 3 );
+		delete[] pixels;
+	}
+	else
+	{
+
+		FILE *fp = fopen( fileName , "wb" );
+		if( !fp ) ERROR_OUT( "Failed to open grid file for writing: %s" , fileName );
+		else
+		{
+			fwrite( &res , sizeof(int) , 1 , fp );
+			if( typeid(Real)==typeid(float) ) fwrite( values , sizeof(float) , resolution , fp );
+			else
+			{
+				float *fValues = new float[resolution];
+				for( int i=0 ; i<resolution ; i++ ) fValues[i] = float( values[i] );
+				fwrite( fValues , sizeof(float) , resolution , fp );
+				delete[] fValues;
+			}
+			fclose( fp );
+		}
+	}
+	delete[] ext;
+}
+
+template< unsigned int Dim , class Real , unsigned int FEMSig >
+void _Execute( const FEMTree< Dim , Real >* tree , FILE* fp )
+{
+	static const unsigned int Degree = FEMSignature< FEMSig >::Degree;
+	DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > > coefficients;
+
+	coefficients.read( fp );
+
+	// Output the grid
+	if( OutGrid.set )
+	{
+		int res = 0;
+		double t = Time();
+		Pointer( Real ) values = tree->template regularGridEvaluate< true >( coefficients , res , -1 , PrimalGrid.set );
+		if( Verbose.set ) printf( "Got grid: %.2f(s)\n" , Time()-t );
+		WriteGrid< Real , Dim >( values , res , OutGrid.value );
+		DeletePointer( values );
+	}
+
+	// Output the mesh
+	if( OutMesh.set )
+	{
+		double t = Time();
+		typedef PlyVertex< Real , Dim > Vertex;
+		CoredFileMeshData< Vertex > mesh;
+		std::function< void ( Vertex& , Point< Real , Dim > , Real , Real ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real , Real ){ v.point = p; };
+#if defined( __GNUC__ ) && __GNUC__ < 5
+		#warning "you've got me gcc version<5"
+			static const unsigned int DataSig = FEMDegreeAndBType< 0 , BOUNDARY_FREE >::Signature;
+		IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< Real >( IsotropicUIntPack< Dim , FEMSig >() , UIntPack< 0 >() , UIntPack< FEMTrivialSignature >() , *tree , ( typename FEMTree< Dim , Real >::template DensityEstimator< 0 >* )NULL , ( SparseNodeData< ProjectiveData< Real , Real > , IsotropicUIntPack< Dim , DataSig > > * )NULL , coefficients , IsoValue.value , mesh , SetVertex , NonLinearFit.set , !NonManifold.set , PolygonMesh.set , FlipOrientation.set );
+#else // !__GNUC__ || __GNUC__ >=5
+		IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< Real >( IsotropicUIntPack< Dim , FEMSig >() , UIntPack< 0 >() , UIntPack< FEMTrivialSignature >() , *tree , ( typename FEMTree< Dim , Real >::template DensityEstimator< 0 >* )NULL , NULL , coefficients , IsoValue.value , mesh , SetVertex , NonLinearFit.set , !NonManifold.set , PolygonMesh.set , FlipOrientation.set );
+#endif // __GNUC__ || __GNUC__ < 4
+
+		if( Verbose.set ) printf( "Got iso-surface: %.2f(s)\n" , Time()-t );
+		if( Verbose.set ) printf( "Vertices / Polygons: %d / %d\n" , (int)( mesh.outOfCorePointCount()+mesh.inCorePoints.size() ) , (int)mesh.polygonCount() );
+
+		std::vector< std::string > comments;
+		if( !PlyWritePolygons< Vertex , Real , Dim >( OutMesh.value , &mesh , ASCII.set ? PLY_ASCII : PLY_BINARY_NATIVE , comments , XForm< Real , Dim+1 >::Identity() ) )
+			ERROR_OUT( "Could not write mesh to: %s" , OutMesh.value );
+	}
+}
+
+
+template< unsigned int Dim , class Real >
+void Execute( FILE* fp , int degree , BoundaryType bType )
+{
+	FEMTree< Dim , Real > tree( fp , MEMORY_ALLOCATOR_BLOCK_SIZE );
+
+	if( Verbose.set ) printf( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
+
+	switch( bType )
+	{
+	case BOUNDARY_FREE:
+	{
+		switch( degree )
+		{
+			case 1: _Execute< Dim , Real , FEMDegreeAndBType< 1 , BOUNDARY_FREE >::Signature >( &tree , fp ) ; break;
+			case 2: _Execute< Dim , Real , FEMDegreeAndBType< 2 , BOUNDARY_FREE >::Signature >( &tree , fp ) ; break;
+			case 3: _Execute< Dim , Real , FEMDegreeAndBType< 3 , BOUNDARY_FREE >::Signature >( &tree , fp ) ; break;
+			case 4: _Execute< Dim , Real , FEMDegreeAndBType< 4 , BOUNDARY_FREE >::Signature >( &tree , fp ) ; break;
+			default: ERROR_OUT( "Only B-Splines of degree 1 - 4 are supported" );
+		}
+	}
+	break;
+	case BOUNDARY_NEUMANN:
+	{
+		switch( degree )
+		{
+			case 1: _Execute< Dim , Real , FEMDegreeAndBType< 1 , BOUNDARY_NEUMANN >::Signature >( &tree , fp ) ; break;
+			case 2: _Execute< Dim , Real , FEMDegreeAndBType< 2 , BOUNDARY_NEUMANN >::Signature >( &tree , fp ) ; break;
+			case 3: _Execute< Dim , Real , FEMDegreeAndBType< 3 , BOUNDARY_NEUMANN >::Signature >( &tree , fp ) ; break;
+			case 4: _Execute< Dim , Real , FEMDegreeAndBType< 4 , BOUNDARY_NEUMANN >::Signature >( &tree , fp ) ; break;
+			default: ERROR_OUT( "Only B-Splines of degree 1 - 4 are supported" );
+		}
+	}
+	break;
+	case BOUNDARY_DIRICHLET:
+	{
+		switch( degree )
+		{
+			case 1: _Execute< Dim , Real , FEMDegreeAndBType< 1 , BOUNDARY_DIRICHLET >::Signature >( &tree , fp ) ; break;
+			case 2: _Execute< Dim , Real , FEMDegreeAndBType< 2 , BOUNDARY_DIRICHLET >::Signature >( &tree , fp ) ; break;
+			case 3: _Execute< Dim , Real , FEMDegreeAndBType< 3 , BOUNDARY_DIRICHLET >::Signature >( &tree , fp ) ; break;
+			case 4: _Execute< Dim , Real , FEMDegreeAndBType< 4 , BOUNDARY_DIRICHLET >::Signature >( &tree , fp ) ; break;
+			default: ERROR_OUT( "Only B-Splines of degree 1 - 4 are supported" );
+		}
+	}
+	break;
+	default: ERROR_OUT( "Not a valid boundary type: %d" , bType );
+	}
+}
+
+int main( int argc , char* argv[] )
+{
+#ifdef ARRAY_DEBUG
+	WARN( "Array debugging enabled" );
+#endif // ARRAY_DEBUG
+	cmdLineParse( argc-1 , &argv[1] , params );
+	omp_set_num_threads( Threads.value > 1 ? Threads.value : 1 );
+	if( Verbose.set )
+	{
+		printf( "**************************************************\n" );
+		printf( "**************************************************\n" );
+		printf( "** Running Octree Visualization (Version %s) **\n" , VERSION );
+		printf( "**************************************************\n" );
+		printf( "**************************************************\n" );
+	}
+
+	if( !In.set )
+	{
+		ShowUsage( argv[0] );
+		return EXIT_FAILURE;
+	}
+	FILE* fp = fopen( In.value , "rb" );
+	if( !fp ) ERROR_OUT( "Failed to open file for reading: %s" , In.value );
+	FEMTreeRealType realType ; int degree ; BoundaryType bType;
+	int dimension;
+	ReadFEMTreeParameter( fp , realType , dimension );
+	{
+		unsigned int dim = dimension;
+		unsigned int* sigs = ReadDenseNodeDataSignatures( fp , dim );
+		if( dimension!=dim ) ERROR_OUT( "Octree and node data dimensions don't math: %d != %d" , dimension , dim );
+		for( unsigned int d=1 ; d<dim ; d++ ) if( sigs[0]!=sigs[d] ) ERROR_OUT( "Anisotropic signatures" );
+		degree = FEMSignatureDegree( sigs[0] );
+		bType = FEMSignatureBType( sigs[0] );
+		delete[] sigs;
+	}
+	if( Verbose.set ) printf( "%d-dimension , %s-precision , degree-%d , %s-boundary\n" , dimension , FEMTreeRealNames[ realType ] , degree , BoundaryNames[ bType ] );
+
+	switch( dimension )
+	{
+	case 2:
+		switch( realType )
+		{
+			case FEM_TREE_REAL_FLOAT:  Execute< 2 , float  >( fp , degree , bType ) ; break;
+			case FEM_TREE_REAL_DOUBLE: Execute< 2 , double >( fp , degree , bType ) ; break;
+			default: ERROR_OUT( "Unrecognized real type: %d" , realType );
+		}
+		break;
+	case 3:
+		switch( realType )
+		{
+			case FEM_TREE_REAL_FLOAT:  Execute< 3 , float  >( fp , degree , bType ) ; break;
+			case FEM_TREE_REAL_DOUBLE: Execute< 3 , double >( fp , degree , bType ) ; break;
+			default: ERROR_OUT( "Unrecognized real type: %d" , realType );
+		}
+		break;
+	default: ERROR_OUT( "Only dimensions 1-4 supported" );
+	}
+
+	fclose( fp );
+	return EXIT_SUCCESS;
+}
diff --git a/Src/Allocator.h b/Src/Allocator.h
index 3ba23d9..61a350d 100644
--- a/Src/Allocator.h
+++ b/Src/Allocator.h
@@ -28,14 +28,9 @@ DAMAGE.
 
 #ifndef ALLOCATOR_INCLUDED
 #define ALLOCATOR_INCLUDED
-
 #include <vector>
 
-class AllocatorState
-{
-public:
-	int index , remains;
-};
+struct AllocatorState{ int index , remains; };
 /** This templated class assists in memory allocation and is well suited for instances
   * when it is known that the sequence of memory allocations is performed in a stack-based
   * manner, so that memory allocated last is released first. It also preallocates memory
@@ -44,30 +39,30 @@ class AllocatorState
   * The allocator is templated off of the class of objects that we would like it to allocate,
   * ensuring that appropriate constructors and destructors are called as necessary.
   */
-template<class T>
-class Allocator
+template< class T >
+class SingleThreadedAllocator
 {
 	int blockSize;
 	int index , remains;
 	std::vector< T* > memory;
 public:
-	Allocator( void ){ blockSize = index = remains = 0; }
-	~Allocator( void ){ reset(); }
+	SingleThreadedAllocator( void ){ blockSize = index = remains = 0; }
+	~SingleThreadedAllocator( void ){ reset(); }
 
 	/** This method is the allocators destructor. It frees up any of the memory that
 	  * it has allocated. */
 	void reset( void )
 	{
-		for( size_t i=0 ; i<memory.size() ; i++ ) delete[] memory[i];
+		for( size_t i=0;i<memory.size();i++ ) delete[] memory[i];
 		memory.clear();
-		blockSize = index = remains = 0;
+		blockSize=index=remains=0;
 	}
 	/** This method returns the memory state of the allocator. */
 	AllocatorState getState( void ) const
 	{
 		AllocatorState s;
-		s.index = index;
-		s.remains = remains;
+		s.index=index;
+		s.remains=remains;
 		return s;
 	}
 
@@ -117,7 +112,7 @@ class Allocator
 				remains=state.remains;
 			}
 			else{
-				for(int j=0;j<state.remains;j<remains){
+				for(int j=0;j<state.remains;j++){
 					memory[index][j].~T();
 					new(&memory[index][j]) T();
 				}
@@ -145,21 +140,38 @@ class Allocator
 	{
 		T* mem;
 		if( !elements ) return NULL;
-		if( elements>blockSize ) fprintf( stderr , "[ERROR] Allocator: elements bigger than block-size: %d>%d\n" , elements , blockSize ) , exit( 0 );
+		if( elements>blockSize ) ERROR_OUT( "elements bigger than block-size: %d>%d" , elements , blockSize );
 		if( remains<elements )
 		{
 			if( index==memory.size()-1 )
 			{
 				mem = new T[blockSize];
-				if( !mem ) fprintf( stderr , "[ERROR] Failed to allocate memory\n" ) , exit(0);
+				if( !mem ) ERROR_OUT( "Failed to allocate memory" );
 				memory.push_back( mem );
 			}
 			index++;
-			remains = blockSize;
+			remains=blockSize;
 		}
 		mem = &(memory[index][blockSize-remains]);
 		remains -= elements;
 		return mem;
 	}
 };
+template< class T >
+class Allocator
+{
+	SingleThreadedAllocator< T >* _allocators;
+	int _maxThreads;
+public:
+	Allocator( void )
+	{
+		_maxThreads = omp_get_max_threads();
+		_allocators = new SingleThreadedAllocator< T >[_maxThreads];
+	}
+	~Allocator( void ){ delete[] _allocators; }
+
+	void set( int blockSize ){ for( int t=0 ; t<_maxThreads ; t++ ) _allocators[t].set( blockSize ); }
+	T* newElements( int elements=1 ){ return _allocators[ omp_get_thread_num() ].newElements( elements ); }
+};
+
 #endif // ALLOCATOR_INCLUDE
diff --git a/Src/Array.h b/Src/Array.h
index 3933009..7e102d8 100644
--- a/Src/Array.h
+++ b/Src/Array.h
@@ -42,7 +42,7 @@ DAMAGE.
 #endif // _WIN64
 
 // Code from http://stackoverflow.com
-void* aligned_malloc( size_t size , size_t align )
+inline void* aligned_malloc( size_t size , size_t align )
 {
 	// Align enough for the data, the alignment padding, and room to store a pointer to the actual start of the memory
 	void*  mem = malloc( size + align + sizeof( void* ) );
@@ -54,10 +54,12 @@ void* aligned_malloc( size_t size , size_t align )
 	( ( void** ) amem )[-1] = mem;
 	return amem;
 }
-void aligned_free( void* mem ) { free( ( ( void** )mem )[-1] ); }
+inline void aligned_free( void* mem ) { free( ( ( void** )mem )[-1] ); }
 
 #ifdef ARRAY_DEBUG
+#ifdef SHOW_WARNINGS
 #pragma message ( "[WARNING] Array debugging is enabled" )
+#endif // SHOW_WARNINGS
 #include "Array.inl"
 #define      Pointer( ... )      Array< __VA_ARGS__ >
 #define ConstPointer( ... ) ConstArray< __VA_ARGS__ >
@@ -81,6 +83,8 @@ template< class C > ConstArray< C > GetPointer( const std::vector< C >& v ){ ret
 
 template< class C >      Array< C > GetPointer(       C* c , int sz ) { return      Array< C >::FromPointer( c , sz ); }
 template< class C > ConstArray< C > GetPointer( const C* c , int sz ) { return ConstArray< C >::FromPointer( c , sz ); }
+template< class C >      Array< C > GetPointer(       C* c , int start , int end ) { return      Array< C >::FromPointer( c , start , end ); }
+template< class C > ConstArray< C > GetPointer( const C* c , int start , int end ) { return ConstArray< C >::FromPointer( c , start , end ); }
 
 #else // !ARRAY_DEBUG
 #define      Pointer( ... )       __VA_ARGS__*
@@ -107,6 +111,7 @@ template< class C > const C* GetPointer( const std::vector< C >& v ){ return &v[
 
 template< class C >       C* GetPointer(       C* c , int sz ) { return c; }
 template< class C > const C* GetPointer( const C* c , int sz ) { return c; }
-
+template< class C >       C* GetPointer(       C* c , int start , int end ) { return c; }
+template< class C > const C* GetPointer( const C* c , int start , int end ) { return c; }
 #endif // ARRAY_DEBUG
 #endif // ARRAY_INCLUDED
diff --git a/Src/Array.inl b/Src/Array.inl
index 6247f52..265ebd0 100644
--- a/Src/Array.inl
+++ b/Src/Array.inl
@@ -27,6 +27,7 @@ DAMAGE.
 */
 #define FULL_ARRAY_DEBUG    0	// Note that this is not thread-safe
 
+#include <string.h>
 #include <stdio.h>
 #include <emmintrin.h>
 #include <vector>
@@ -66,14 +67,10 @@ static std::vector< DebugMemoryInfo > memoryInfo;
 template< class C >
 class Array
 {
+	template< class D > friend class Array;
 	void _assertBounds( long long idx ) const
 	{
-		if( idx<min || idx>=max )
-		{
-			fprintf( stderr , "Array index out-of-bounds: %lld <= %lld < %lld\n" , min , idx , max );
-			ASSERT( 0 );
-			exit( 0 );
-		}
+		if( idx<min || idx>=max ) ERROR_OUT( "Array index out-of-bounds: %lld <= %lld < %lld" , min , idx , max );
 	}
 protected:
 	C *data , *_data;
@@ -92,11 +89,7 @@ protected:
 		{
 			size_t idx;
 			for( idx=0 ; idx<memoryInfo.size( ) ; idx++ ) if( memoryInfo[idx].address==ptr ) break;
-			if( idx==memoryInfo.size() )
-			{
-				fprintf( stderr , "Could not find memory in address table\n" );
-				ASSERT( 0 );
-			}
+			if( idx==memoryInfo.size() ) ERROR_OUT( "Could not find memory in address table" );
 			else
 			{
 				memoryInfo[idx] = memoryInfo[memoryInfo.size()-1];
@@ -115,7 +108,9 @@ public:
 		Array a;
 		a._data = a.data = new C[size];
 		a.min = 0;
+#ifdef SHOW_WARNINGS
 #pragma message( "[WARNING] Casting unsigned to signed" )
+#endif // SHOW_WARNINGS
 		a.max = ( long long ) size;
 #if FULL_ARRAY_DEBUG
 		_AddMemoryInfo( a._data , name );
@@ -129,7 +124,9 @@ public:
 		if( clear ) memset( a.data ,  0 , size * sizeof( C ) );
 //		else        memset( a.data , -1 , size * sizeof( C ) );
 		a.min = 0;
+#ifdef SHOW_WARNINGS
 #pragma message( "[WARNING] Casting unsigned to signed" )
+#endif // SHOW_WARNINGS
 		a.max = ( long long ) size;
 #if FULL_ARRAY_DEBUG
 		_AddMemoryInfo( a._data , name );
@@ -144,7 +141,9 @@ public:
 		if( clear ) memset( a.data ,  0 , size * sizeof( C ) );
 //		else        memset( a.data , -1 , size * sizeof( C ) );
 		a.min = 0;
+#ifdef SHOW_WARNINGS
 #pragma message( "[WARNING] Casting unsigned to signed" )
+#endif // SHOW_WARNINGS
 		a.max = ( long long ) size;
 #if FULL_ARRAY_DEBUG
 		_AddMemoryInfo( a._data , name );
@@ -161,7 +160,9 @@ public:
 #endif // FULL_ARRAY_DEBUG
 		a._data = NULL;
 		_a.min = 0;
+#ifdef SHOW_WARNINGS
 #pragma message( "[WARNING] Casting unsigned to signed" )
+#endif // SHOW_WARNINGS
 		_a.max = ( long long ) size;
 #if FULL_ARRAY_DEBUG
 		_AddMemoryInfo( _a._data , name );
@@ -191,12 +192,7 @@ public:
 			data = (C*)a.data;
 			min = ( a.minimum() * szD ) / szC;
 			max = ( a.maximum() * szD ) / szC;
-			if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD )
-			{
-				fprintf( stderr , "Could not convert array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld\n" , a.minimum() , a.maximum() , szD , min , max , szC );
-				ASSERT( 0 );
-				exit( 0 );
-			}
+			if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD ) ERROR_OUT( "Could not convert array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld" , a.minimum() , a.maximum() , szD , min , max , szC );
 		}
 	}
 	static Array FromPointer( C* data , long long max )
@@ -350,14 +346,10 @@ public:
 template< class C >
 class ConstArray
 {
+	template< class D > friend class ConstArray;
 	void _assertBounds( long long idx ) const
 	{
-		if( idx<min || idx>=max )
-		{
-			fprintf( stderr , "ConstArray index out-of-bounds: %lld <= %lld < %lld\n" , min , idx , max );
-			ASSERT( 0 );
-			exit( 0 );
-		}
+		if( idx<min || idx>=max ) ERROR_OUT( "ConstArray index out-of-bounds: %lld <= %lld < %lld" , min , idx , max );
 	}
 protected:
 	const C *data;
@@ -387,13 +379,7 @@ public:
 		data = ( const C* )a.pointer( );
 		min = ( a.minimum() * szD ) / szC;
 		max = ( a.maximum() * szD ) / szC;
-		if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD )
-		{
-//			fprintf( stderr , "Could not convert const array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld\n" , a.minimum() , a.maximum() , szD , min , max , szC );
-			fprintf( stderr , "Could not convert const array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld\n %lld %lld %lld\n" , a.minimum() , a.maximum() , szD , min , max , szC , a.minimum() , a.minimum()*szD , (a.minimum()*szD)/szC );
-			ASSERT( 0 );
-			exit( 0 );
-		}
+		if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD ) ERROR_OUT( "Could not convert const array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld\n %lld %lld %lld" , a.minimum() , a.maximum() , szD , min , max , szC , a.minimum() , a.minimum()*szD , (a.minimum()*szD)/szC );
 	}
 	template< class D >
 	inline ConstArray( const ConstArray< D >& a )
@@ -404,12 +390,7 @@ public:
 		data = ( const C*)a.pointer( );
 		min = ( a.minimum() * szD ) / szC;
 		max = ( a.maximum() * szD ) / szC;
-		if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD )
-		{
-			fprintf( stderr , "Could not convert array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld\n" , a.minimum() , a.maximum() , szD , min , max , szC );
-			ASSERT( 0 );
-			exit( 0 );
-		}
+		if( min*szC!=a.minimum()*szD || max*szC!=a.maximum()*szD ) ERROR_OUT( "Could not convert array [ %lld , %lld ] * %lld => [ %lld , %lld ] * %lld" , a.minimum() , a.maximum() , szD , min , max , szC );
 	}
 	static ConstArray FromPointer( const C* data , long long max )
 	{
@@ -528,84 +509,44 @@ inline void PrintMemoryInfo( void ){ for( size_t i=0 ; i<memoryInfo.size() ; i++
 template< class C >
 Array< C > memcpy( Array< C > destination , const void* source , size_t size )
 {
-	if( size>destination.maximum()*sizeof(C) )
-	{
-		fprintf( stderr , "Size of copy exceeds destination maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>destination.maximum()*sizeof(C) ) ERROR_OUT( "Size of copy exceeds destination maximum: %lld > %lld" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
 	if( size ) memcpy( &destination[0] , source , size );
 	return destination;
 }
 template< class C , class D >
 Array< C > memcpy( Array< C > destination , Array< D > source , size_t size )
 {
-	if( size>destination.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of copy exceeds destination maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
-	if( size>source.maximum()*sizeof( D ) )
-	{
-		fprintf( stderr , "Size of copy exceeds source maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>destination.maximum()*sizeof( C ) ) ERROR_OUT( "Size of copy exceeds destination maximum: %lld > %lld" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
+	if( size>source.maximum()*sizeof( D ) ) ERROR_OUT( "Size of copy exceeds source maximum: %lld > %lld" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
 	if( size ) memcpy( &destination[0] , &source[0] , size );
 	return destination;
 }
 template< class C , class D >
 Array< C > memcpy( Array< C > destination , ConstArray< D > source , size_t size )
 {
-	if( size>destination.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of copy exceeds destination maximum: %lld > %lld\n" , ( long long )( size ) , ( long  long )( destination.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
-	if( size>source.maximum()*sizeof( D ) )
-	{
-		fprintf( stderr , "Size of copy exceeds source maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>destination.maximum()*sizeof( C ) ) ERROR_OUT( "Size of copy exceeds destination maximum: %lld > %lld" , ( long long )( size ) , ( long  long )( destination.maximum()*sizeof( C ) ) );
+	if( size>source.maximum()*sizeof( D ) ) ERROR_OUT( "Size of copy exceeds source maximum: %lld > %lld" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
 	if( size ) memcpy( &destination[0] , &source[0] , size );
 	return destination;
 }
 template< class D >
 void* memcpy( void* destination , Array< D > source , size_t size )
 {
-	if( size>source.maximum()*sizeof( D ) )
-	{
-		fprintf( stderr , "Size of copy exceeds source maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>source.maximum()*sizeof( D ) ) ERROR_OUT( "Size of copy exceeds source maximum: %lld > %lld" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
 	if( size ) memcpy( destination , &source[0] , size );
 	return destination;
 }
 template< class D >
 void* memcpy( void* destination , ConstArray< D > source , size_t size )
 {
-	if( size>source.maximum()*sizeof( D ) )
-	{
-		fprintf( stderr , "Size of copy exceeds source maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>source.maximum()*sizeof( D ) ) ERROR_OUT( "Size of copy exceeds source maximum: %lld > %lld" , ( long long )( size ) , ( long long )( source.maximum()*sizeof( D ) ) );
 	if( size ) memcpy( destination , &source[0] , size );
 	return destination;
 }
 template< class C >
 Array< C > memset( Array< C > destination , int value , size_t size )
 {
-	if( size>destination.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of set exceeds destination maximum: %lld > %lld\n" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( size>destination.maximum()*sizeof( C ) ) ERROR_OUT( "Size of set exceeds destination maximum: %lld > %lld" , ( long long )( size ) , ( long long )( destination.maximum()*sizeof( C ) ) );
 	if( size ) memset( &destination[0] , value , size );
 	return destination;
 }
@@ -613,50 +554,25 @@ Array< C > memset( Array< C > destination , int value , size_t size )
 template< class C >
 size_t fread( Array< C > destination , size_t eSize , size_t count , FILE* fp )
 {
-	if( count*eSize>destination.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of read exceeds source maximum: %lld > %lld\n" , ( long long )( count*eSize ) , ( long long )( destination.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( count*eSize>destination.maximum()*sizeof( C ) ) ERROR_OUT( "Size of read exceeds source maximum: %lld > %lld" , ( long long )( count*eSize ) , ( long long )( destination.maximum()*sizeof( C ) ) );
 	return fread( &destination[0] , eSize , count , fp );
 }
 template< class C >
 size_t fwrite( Array< C > source , size_t eSize , size_t count , FILE* fp )
 {
-	if( count*eSize>source.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of write exceeds source maximum: %lld > %lld\n" , ( long long )( count*eSize ) , ( long long )( source.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( count*eSize>source.maximum()*sizeof( C ) ) ERROR_OUT( "Size of write exceeds source maximum: %lld > %lld" , ( long long )( count*eSize ) , ( long long )( source.maximum()*sizeof( C ) ) );
 	return fwrite( &source[0] , eSize , count , fp );
 }
 template< class C >
 size_t fwrite( ConstArray< C > source , size_t eSize , size_t count , FILE* fp )
 {
-	if( count*eSize>source.maximum()*sizeof( C ) )
-	{
-		fprintf( stderr , "Size of write exceeds source maximum: %lld > %lld\n" , ( long long )( count*eSize ) , ( long long )( source.maximum()*sizeof( C ) ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( count*eSize>source.maximum()*sizeof( C ) ) ERROR_OUT( "Size of write exceeds source maximum: %lld > %lld" , ( long long )( count*eSize ) , ( long long )( source.maximum()*sizeof( C ) ) );
 	return fwrite( &source[0] , eSize , count , fp );
 }
 template< class C >
 void qsort( Array< C > base , size_t numElements , size_t elementSize , int (*compareFunction)( const void* , const void* ) )
 {
-	if( sizeof(C)!=elementSize )
-	{
-		fprintf( stderr , "Element sizes differ: %lld != %lld\n" , ( long long )( sizeof(C) ) , ( long long )( elementSize ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
-	if( base.minimum()>0 || base.maximum()<numElements )
-	{
-		fprintf( stderr , "Array access out of bounds: %lld <= 0 <= %lld <= %lld\n" , base.minimum() , base.maximum() , ( long long )( numElements ) );
-		ASSERT( 0 );
-		exit( 0 );
-	}
+	if( sizeof(C)!=elementSize ) ERROR_OUT( "Element sizes differ: %lld != %lld" , ( long long )( sizeof(C) ) , ( long long )( elementSize ) );
+	if( base.minimum()>0 || base.maximum()<numElements ) ERROR_OUT( "Array access out of bounds: %lld <= 0 <= %lld <= %lld" , base.minimum() , base.maximum() , ( long long )( numElements ) );
 	qsort( base.pointer() , numElements , elementSize , compareFunction );
 }
diff --git a/Src/BMPStream.inl b/Src/BMPStream.inl
new file mode 100644
index 0000000..f544d8e
--- /dev/null
+++ b/Src/BMPStream.inl
@@ -0,0 +1,207 @@
+/*
+Copyright (c) 2010, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+#ifndef BMP_STREAM_INCLUDED
+#define BMP_STREAM_INCLUDED
+
+#include <stdio.h>
+#include "Array.h"
+
+/* constants for the biCompression field */
+#define BI_RGB        0L
+#define BI_RLE8       1L
+#define BI_RLE4       2L
+#define BI_BITFIELDS  3L
+
+/* Some magic numbers */
+
+#define BMP_BF_TYPE 0x4D42
+/* word BM */
+
+#define BMP_BF_OFF_BITS 54
+/* 14 for file header + 40 for info header (not sizeof(), but packed size) */
+
+#define BMP_BI_SIZE 40
+/* packed size of info header */
+
+#ifndef _WIN32
+typedef struct tagBITMAPFILEHEADER
+{
+    unsigned short int bfType;
+    unsigned int bfSize;
+    unsigned short int bfReserved1;
+    unsigned short int bfReserved2;
+    unsigned int bfOffBits;
+} BITMAPFILEHEADER;
+
+typedef struct tagBITMAPINFOHEADER {
+    unsigned int biSize;
+    int biWidth;
+    int biHeight;
+    unsigned short int biPlanes;
+    unsigned short int biBitCount;
+    unsigned int biCompression;
+    unsigned int biSizeImage;
+    int biXPelsPerMeter;
+    int biYPelsPerMeter;
+    unsigned int biClrUsed;
+    unsigned int biClrImportant;
+} BITMAPINFOHEADER;
+#endif // !_WIN32
+
+
+struct BMPInfo
+{
+	FILE* fp;
+	Pointer( unsigned char ) data;
+	int width , lineLength;
+};
+inline void BMPGetImageInfo( char* fileName , int& width , int& height , int& channels , int& bytesPerChannel )
+{
+    BITMAPFILEHEADER bmfh;
+    BITMAPINFOHEADER bmih;
+
+	FILE* fp = fopen( fileName , "rb" );
+	if( !fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+
+	fread( &bmfh , sizeof( BITMAPFILEHEADER ) , 1 , fp );
+	fread( &bmih , sizeof( BITMAPINFOHEADER ) , 1 , fp );
+
+	if( bmfh.bfType!=BMP_BF_TYPE || bmfh.bfOffBits!=BMP_BF_OFF_BITS ){ fclose(fp) ; ERROR_OUT( "Bad bitmap file header" ); };
+	if( bmih.biSize!=BMP_BI_SIZE || bmih.biWidth<=0 || bmih.biHeight<=0 || bmih.biPlanes!=1 || bmih.biBitCount!=24 || bmih.biCompression!=BI_RGB ) { fclose(fp) ; ERROR_OUT( "Bad bitmap file info" ); }
+	width           = bmih.biWidth;
+	height          = bmih.biHeight;
+	channels        = 3;
+	bytesPerChannel = 1;
+	int lineLength = width * channels;
+	if( lineLength % 4 ) lineLength = (lineLength / 4 + 1) * 4;
+	if( bmih.biSizeImage!=lineLength*height ){ fclose(fp) ; ERROR_OUT( "Bad bitmap image size" ) , fclose( fp ); };
+	fclose( fp );
+}
+
+inline void* BMPInitRead( char* fileName , int& width , int& height )
+{
+    BITMAPFILEHEADER bmfh;
+    BITMAPINFOHEADER bmih;
+
+	BMPInfo* info = (BMPInfo*)malloc( sizeof( BMPInfo ) );
+	info->fp = fopen( fileName , "rb" );
+	if( !info->fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+
+	fread( &bmfh , sizeof( BITMAPFILEHEADER ) , 1 , info->fp );
+	fread( &bmih , sizeof( BITMAPINFOHEADER ) , 1 , info->fp );
+
+	if( bmfh.bfType!=BMP_BF_TYPE || bmfh.bfOffBits!=BMP_BF_OFF_BITS ) ERROR_OUT( "Bad bitmap file header" );
+	if( bmih.biSize!=BMP_BI_SIZE || bmih.biWidth<=0 || bmih.biHeight<=0 || bmih.biPlanes!=1 || bmih.biBitCount!=24 || bmih.biCompression!=BI_RGB ) ERROR_OUT( "Bad bitmap file info" );
+
+	info->width = width = bmih.biWidth;
+	height = bmih.biHeight;
+	info->lineLength = width * 3;
+	if( info->lineLength % 4 ) info->lineLength = (info->lineLength / 4 + 1) * 4;
+	if( bmih.biSizeImage!=info->lineLength*height ) ERROR_OUT( "Bad bitmap image size" );
+	info->data = AllocPointer< unsigned char >( info->lineLength );
+	if( !info->data ) ERROR_OUT( "Could not allocate memory for bitmap data" );
+
+	fseek( info->fp , (long) bmfh.bfOffBits , SEEK_SET );
+	fseek( info->fp , (long) info->lineLength * height , SEEK_CUR );
+	return info;
+}
+template< int Channels , bool HDR >
+inline void* BMPInitWrite( char* fileName , int width , int height , int quality )
+{
+	if( HDR ) WARN( "No HDR support for JPEG" );
+	BITMAPFILEHEADER bmfh;
+	BITMAPINFOHEADER bmih;
+
+	BMPInfo* info = (BMPInfo*)malloc( sizeof( BMPInfo ) );
+	info->fp = fopen( fileName , "wb" );
+	if( !info->fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+	info->width = width;
+
+	info->lineLength = width * 3;	/* RGB */
+	if( info->lineLength % 4 ) info->lineLength = (info->lineLength / 4 + 1) * 4;
+	info->data = AllocPointer< unsigned char >( info->lineLength );
+	if( !info->data ) ERROR_OUT( "Could not allocate memory for bitmap data" );
+	/* Write file header */
+
+	bmfh.bfType = BMP_BF_TYPE;
+	bmfh.bfSize = BMP_BF_OFF_BITS + info->lineLength * height;
+	bmfh.bfReserved1 = 0;
+	bmfh.bfReserved2 = 0;
+	bmfh.bfOffBits = BMP_BF_OFF_BITS;
+
+	fwrite( &bmfh , sizeof(BITMAPFILEHEADER) , 1 , info->fp );
+
+	bmih.biSize = BMP_BI_SIZE;
+	bmih.biWidth = width;
+	bmih.biHeight = -height;
+	bmih.biPlanes = 1;
+	bmih.biBitCount = 24;			/* RGB */
+	bmih.biCompression = BI_RGB;	/* RGB */
+	bmih.biSizeImage = info->lineLength * (unsigned int) bmih.biHeight;	/* RGB */
+	bmih.biXPelsPerMeter = 2925;
+	bmih.biYPelsPerMeter = 2925;
+	bmih.biClrUsed = 0;
+	bmih.biClrImportant = 0;
+
+	fwrite( &bmih , sizeof(BITMAPINFOHEADER) , 1 , info->fp );
+
+	return info;
+}
+template< int Channels , class ChannelType >
+inline void BMPWriteRow( Pointer( ChannelType ) pixels , void* v , int j )
+{
+	BMPInfo* info = (BMPInfo*)v;
+	ConvertRow< ChannelType , unsigned char >( pixels , info->data , info->width , Channels , 3 );
+	for( int i=0 ; i<info->width ; i++ ) { unsigned char temp = info->data[i*3] ; info->data[i*3] = info->data[i*3+2] ; info->data[i*3+2] = temp; }
+	fwrite( info->data , sizeof(unsigned char) , info->width*3 , info->fp );
+	int nbytes = info->width*3;
+	while( nbytes % 4 ) putc( 0 , info->fp ) , nbytes++;
+}
+template< int Channels , class ChannelType >
+void BMPReadRow( Pointer( ChannelType ) pixels , void* v , int j )
+{
+	BMPInfo* info = (BMPInfo*)v;
+
+	fseek( info->fp , -info->lineLength , SEEK_CUR );
+    fread( info->data , 1 , info->lineLength , info->fp );
+	fseek( info->fp , -info->lineLength , SEEK_CUR );
+	if( ferror(info->fp) ) ERROR_OUT( "Error reading bitmap row" );
+	for( int i=0 ; i<info->width ; i++ ) { unsigned char temp = info->data[i*3] ; info->data[i*3] = info->data[i*3+2] ; info->data[i*3+2] = temp; }
+	ConvertRow< unsigned char , ChannelType >( ( ConstPointer( unsigned char ) )info->data , pixels , info->width , 3 , Channels );
+}
+inline void BMPFinalize( void* v )
+{
+	BMPInfo* info = (BMPInfo*)v;
+	fclose( info->fp );
+	FreePointer( info->data );
+	free( info );
+}
+
+inline void BMPFinalizeWrite( void* v ){ BMPFinalize( v ); }
+inline void BMPFinalizeRead ( void* v ){ BMPFinalize( v ); }
+#endif // BMP_STREAM_INCLUDED
\ No newline at end of file
diff --git a/Src/BSplineData.h b/Src/BSplineData.h
index 601253e..9f791d0 100644
--- a/Src/BSplineData.h
+++ b/Src/BSplineData.h
@@ -29,7 +29,7 @@ DAMAGE.
 #ifndef BSPLINE_DATA_INCLUDED
 #define BSPLINE_DATA_INCLUDED
 
-#define NEW_BSPLINE_CODE
+#include <string.h>
 
 #include "BinaryNode.h"
 #include "PPolynomial.h"
@@ -44,11 +44,37 @@ enum BoundaryType
 };
 const char* BoundaryNames[] = { "free" , "Dirichlet" , "Neumann" };
 template< BoundaryType BType > inline bool HasPartitionOfUnity( void ){ return BType!=BOUNDARY_DIRICHLET; }
+inline bool HasPartitionOfUnity( BoundaryType bType ){ return bType!=BOUNDARY_DIRICHLET; }
+template< BoundaryType BType , unsigned int D > struct DerivativeBoundary{};
+template< unsigned int D > struct DerivativeBoundary< BOUNDARY_FREE      , D >{ static const BoundaryType BType = BOUNDARY_FREE; };
+template< unsigned int D > struct DerivativeBoundary< BOUNDARY_DIRICHLET , D >{ static const BoundaryType BType = DerivativeBoundary< BOUNDARY_NEUMANN   , D-1 >::BType; };
+template< unsigned int D > struct DerivativeBoundary< BOUNDARY_NEUMANN   , D >{ static const BoundaryType BType = DerivativeBoundary< BOUNDARY_DIRICHLET , D-1 >::BType; };
+template< > struct DerivativeBoundary< BOUNDARY_FREE      , 0 >{ static const BoundaryType BType = BOUNDARY_FREE; };
+template< > struct DerivativeBoundary< BOUNDARY_DIRICHLET , 0 >{ static const BoundaryType BType = BOUNDARY_DIRICHLET; };
+template< > struct DerivativeBoundary< BOUNDARY_NEUMANN   , 0 >{ static const BoundaryType BType = BOUNDARY_NEUMANN; };
 
-// This class represents a function that is a linear combination of B-spline elements.
-// The coeff member indicating how much of each element is present.
+
+// Generate a single signature that combines the degree, boundary type, and number of supported derivatives
+template< unsigned int Degree , BoundaryType BType=BOUNDARY_FREE > struct FEMDegreeAndBType { static const unsigned int Signature =  Degree * BOUNDARY_COUNT + BType; };
+
+// Extract the degree and boundary type from the signaure
+template< unsigned int Signature > struct FEMSignature
+{
+	static const unsigned int Degree = ( Signature / BOUNDARY_COUNT );
+	static const BoundaryType BType = (BoundaryType)( Signature % BOUNDARY_COUNT );
+	template< unsigned int D=1 >
+	static constexpr typename std::enable_if< (Degree>=D) , unsigned int >::type DSignature( void ){ return FEMDegreeAndBType< Degree-D , DerivativeBoundary< BType , D >::BType >::Signature; }
+};
+
+unsigned int FEMSignatureDegree( unsigned int signature ){ return signature / BOUNDARY_COUNT; }
+BoundaryType FEMSignatureBType ( unsigned int signature ){ return (BoundaryType)( signature % BOUNDARY_COUNT ); }
+
+static const unsigned int FEMTrivialSignature = FEMDegreeAndBType< 0 , BOUNDARY_FREE >::Signature;
+
+// This class represents a function that is a linear combination of B-spline elements,
+// with the coeff member indicating how much of each element is present.
 // [WARNING] The ordering of B-spline elements is in the opposite order from that returned by Polynomial::BSplineComponent
-template< int Degree >
+template< unsigned int Degree >
 struct BSplineElementCoefficients
 {
 	int coeffs[Degree+1];
@@ -61,7 +87,7 @@ struct BSplineElementCoefficients
 // On each block, the function is a degree-Degree polynomial, represented by the coefficients
 // in the associated BSplineElementCoefficients.
 // [NOTE] This representation of a function is agnostic to the type of boundary conditions (though the constructor is not).
-template< int Degree >
+template< unsigned int Degree >
 struct BSplineElements : public std::vector< BSplineElementCoefficients< Degree > >
 {
 	static const bool _Primal = (Degree&1)==1;
@@ -110,12 +136,14 @@ struct BSplineElements : public std::vector< BSplineElementCoefficients< Degree
 		return P.compress(0);
 	}
 };
-template< int Degree , int DDegree > struct Differentiator                   { static void Differentiate( const BSplineElements< Degree >& bse , BSplineElements< DDegree >& dbse ); };
-template< int Degree >               struct Differentiator< Degree , Degree >{ static void Differentiate( const BSplineElements< Degree >& bse , BSplineElements<  Degree >& dbse ); };
+
+template< unsigned int Degree , unsigned int DDegree > struct Differentiator                   { static void Differentiate( const BSplineElements< Degree >& bse , BSplineElements< DDegree >& dbse ); };
+template< unsigned int Degree >                        struct Differentiator< Degree , Degree >{ static void Differentiate( const BSplineElements< Degree >& bse , BSplineElements<  Degree >& dbse ); };
+
 #define BSPLINE_SET_BOUNDS( name , s , e ) \
 	static const int name ## Start = (s); \
 	static const int name ## End   = (e); \
-	static const int name ## Size  = (e)-(s)+1
+	static const unsigned int name ## Size  = (e)-(s)+1
 
 // Assumes that x is non-negative
 #define _FLOOR_OF_HALF( x ) (   (x)    >>1 )
@@ -128,13 +156,17 @@ template< int Degree >               struct Differentiator< Degree , Degree >{ s
 #define SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF( x ) (  CEIL_OF_HALF( x ) )
 #define LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF( x ) ( FLOOR_OF_HALF( x ) )
 
-template< int Degree >
+template< unsigned int Degree >
 struct BSplineSupportSizes
 {
+protected:
+	static const int _Degree = Degree;
+public:
 	inline static int Nodes( int depth ){ return ( 1<<depth ) + ( Degree&1 ); }
 	inline static bool OutOfBounds( int depth , int offset ){ return offset>=0 || offset<Nodes(depth); }
 	// An index is interiorly supported if its support is in the range [0,1<<depth)
 	inline static void InteriorSupportedSpan( int depth , int& begin , int& end ){ begin = -SupportStart , end = (1<<depth)-SupportEnd; }
+	inline static bool IsInteriorlySupported( int depth , int offset ){ return offset+SupportStart>=0 && offset+SupportEnd<(1<<depth); }
 
 	// If the degree is even, we use a dual basis and functions are centered at the center of the interval
 	// It the degree is odd, we use a primal basis and functions are centered at the left end of the interval
@@ -142,10 +174,12 @@ struct BSplineSupportSizes
 	//	Support( I ) = [ I - (Degree+1-Inset)/2 , I + (Degree+1+Inset)/2 ]
 	// [NOTE] The value of ( Degree + 1 +/- Inset ) is always even
 	static const int Inset = (Degree&1) ? 0 : 1;
-	BSPLINE_SET_BOUNDS(      Support , -( (Degree+1)/2 ) , Degree/2           );
-	BSPLINE_SET_BOUNDS( ChildSupport ,    2*SupportStart , 2*(SupportEnd+1)-1 );
-	BSPLINE_SET_BOUNDS(       Corner ,    SupportStart+1 , SupportEnd         );
-	BSPLINE_SET_BOUNDS(  ChildCorner ,  2*SupportStart+1 , 2*SupportEnd + 1   );
+	BSPLINE_SET_BOUNDS(      Support , -( (_Degree+1)/2 ) , _Degree/2          );
+	BSPLINE_SET_BOUNDS( ChildSupport ,     2*SupportStart , 2*(SupportEnd+1)-1 );
+	BSPLINE_SET_BOUNDS(       Corner ,     SupportStart+1 , SupportEnd         );
+	BSPLINE_SET_BOUNDS(  ChildCorner ,   2*SupportStart+1 , 2*SupportEnd + 1   );
+	BSPLINE_SET_BOUNDS(      BCorner ,      CornerStart-1 ,      CornerEnd+1 );
+	BSPLINE_SET_BOUNDS( ChildBCorner , ChildCornerStart-1 , ChildCornerEnd+1 );
 
 	// Setting I=0, we are looking for the smallest/largest integers J such that:
 	//		Support( 0 ) CONTAINS Support( J )
@@ -153,7 +187,7 @@ struct BSplineSupportSizes
 	// Which is the same as the smallest/largest integers J such that:
 	//		J - (Degree+1-Inset)/2 >= -(Degree+1-Inset)	| J + (Degree+1+Inset)/2 <= (Degree+1+Inset)
 	// <=>	J >= -(Degree+1-Inset)/2					| J <= (Degree+1+Inset)/2
-	BSPLINE_SET_BOUNDS( UpSample , - ( Degree + 1 - Inset ) / 2 , ( Degree + 1 + Inset ) /2 );
+	BSPLINE_SET_BOUNDS( UpSample , - ( _Degree + 1 - Inset ) / 2 , ( _Degree + 1 + Inset ) /2 );
 
 	// Setting I=0/1, we are looking for the smallest/largest integers J such that:
 	//		Support( J ) CONTAINS Support( 0/1 )
@@ -161,19 +195,22 @@ struct BSplineSupportSizes
 	// Which is the same as the smallest/largest integers J such that:
 	//		2*J + (Degree+1+Inset) >= 0/1 + (Degree+1+Inset)/2	| 2*J - (Degree+1-Inset) <= 0/1 - (Degree+1-Inset)/2
 	// <=>	2*J >= 0/1 - (Degree+1+Inset)/2						| 2*J <= 0/1 + (Degree+1-Inset)/2
-	BSPLINE_SET_BOUNDS( DownSample0 , SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF( 0 - ( Degree + 1 + Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF( 0 + ( Degree + 1 - Inset ) / 2 ) );
-	BSPLINE_SET_BOUNDS( DownSample1 , SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF( 1 - ( Degree + 1 + Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF( 1 + ( Degree + 1 - Inset ) / 2 ) );
-	static const int DownSampleStart[] , DownSampleEnd[] , DownSampleSize[];
+	BSPLINE_SET_BOUNDS( DownSample0 , SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF( 0 - ( _Degree + 1 + Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF( 0 + ( _Degree + 1 - Inset ) / 2 ) );
+	BSPLINE_SET_BOUNDS( DownSample1 , SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF( 1 - ( _Degree + 1 + Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF( 1 + ( _Degree + 1 - Inset ) / 2 ) );
+	static const int DownSampleStart[] , DownSampleEnd[];
+	static const unsigned int DownSampleSize[];
 };
-template< int Degree > const int BSplineSupportSizes< Degree >::DownSampleStart[] = { DownSample0Start , DownSample1Start };
-template< int Degree > const int BSplineSupportSizes< Degree >::DownSampleEnd  [] = { DownSample0End   , DownSample1End   };
-template< int Degree > const int BSplineSupportSizes< Degree >::DownSampleSize [] = { DownSample0Size  , DownSample1Size  };
-
+template< unsigned int Degree > const int BSplineSupportSizes< Degree >::DownSampleStart[] = { DownSample0Start , DownSample1Start };
+template< unsigned int Degree > const int BSplineSupportSizes< Degree >::DownSampleEnd  [] = { DownSample0End   , DownSample1End   };
+template< unsigned int Degree > const unsigned int BSplineSupportSizes< Degree >::DownSampleSize [] = { DownSample0Size  , DownSample1Size  };
 
-// Given a B-Spline of degree Degree1 at position i, this gives the offsets of the B-splines of degree Degree2 that just overlap with it.
-template< int Degree1 , int Degree2 >
+template< unsigned int Degree1 , unsigned int Degree2=Degree1 >
 struct BSplineOverlapSizes
 {
+protected:
+	static const int _Degree1 = Degree1;
+	static const int _Degree2 = Degree2;
+public:
 	typedef BSplineSupportSizes< Degree1 > EData1;
 	typedef BSplineSupportSizes< Degree2 > EData2;
 	BSPLINE_SET_BOUNDS(             Overlap , EData1::     SupportStart - EData2::SupportEnd , EData1::     SupportEnd - EData2::SupportStart );
@@ -187,19 +224,42 @@ struct BSplineOverlapSizes
 	// Which is the same as the smallest/largest integers J such that:
 	//		0/1 - (Degree1+1-Inset1)/2 < 2*J + (Degree2+1+Inset2)			| 0/1 + (Degree1+1+Inset1)/2 > 2*J - (Degree2+1-Inset2)	
 	// <=>	2*J > 0/1 - ( 2*Degree2 + Degree1 + 3 + 2*Inset2 - Inset1 ) / 2	| 2*J < 0/1 + ( 2*Degree2 + Degree1 + 3 - 2*Inset2 + Inset1 ) / 2
-	BSPLINE_SET_BOUNDS( ParentOverlap0 , SMALLEST_INTEGER_LARGER_THAN_HALF( 0 - ( 2*Degree2 + Degree1 + 3 + 2*EData2::Inset - EData1::Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_HALF( 0 + ( 2*Degree2 + Degree1 + 3 - 2*EData2::Inset + EData1::Inset ) / 2 ) );
-	BSPLINE_SET_BOUNDS( ParentOverlap1 , SMALLEST_INTEGER_LARGER_THAN_HALF( 1 - ( 2*Degree2 + Degree1 + 3 + 2*EData2::Inset - EData1::Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_HALF( 1 + ( 2*Degree2 + Degree1 + 3 - 2*EData2::Inset + EData1::Inset ) / 2 ) );
+	BSPLINE_SET_BOUNDS( ParentOverlap0 , SMALLEST_INTEGER_LARGER_THAN_HALF( 0 - ( 2*_Degree2 + _Degree1 + 3 + 2*EData2::Inset - EData1::Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_HALF( 0 + ( 2*_Degree2 + _Degree1 + 3 - 2*EData2::Inset + EData1::Inset ) / 2 ) );
+	BSPLINE_SET_BOUNDS( ParentOverlap1 , SMALLEST_INTEGER_LARGER_THAN_HALF( 1 - ( 2*_Degree2 + _Degree1 + 3 + 2*EData2::Inset - EData1::Inset ) / 2 ) , LARGEST_INTEGER_SMALLER_THAN_HALF( 1 + ( 2*_Degree2 + _Degree1 + 3 - 2*EData2::Inset + EData1::Inset ) / 2 ) );
 	static const int ParentOverlapStart[] , ParentOverlapEnd[] , ParentOverlapSize[];
 };
-template< int Degree1 , int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[] = { ParentOverlap0Start , ParentOverlap1Start };
-template< int Degree1 , int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [] = { ParentOverlap0End   , ParentOverlap1End   };
-template< int Degree1 , int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapSize [] = { ParentOverlap0Size  , ParentOverlap1Size  };
+template< unsigned int Degree1 , unsigned int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[] = { ParentOverlap0Start , ParentOverlap1Start };
+template< unsigned int Degree1 , unsigned int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [] = { ParentOverlap0End   , ParentOverlap1End   };
+template< unsigned int Degree1 , unsigned int Degree2 > const int BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapSize [] = { ParentOverlap0Size  , ParentOverlap1Size  };
 
-template< int Degree , BoundaryType BType >
+struct EvaluationData
+{
+	struct CornerEvaluator
+	{
+		virtual double value( int fIdx , int cIdx , int d ) const = 0;
+		virtual void set( int depth ) = 0;
+		virtual ~CornerEvaluator( void ){}
+	};
+	struct CenterEvaluator
+	{
+		virtual double value( int fIdx , int cIdx , int d ) const = 0;
+		virtual void set( int depth ) = 0;
+		virtual ~CenterEvaluator( void ){}
+	};
+	struct UpSampleEvaluator
+	{
+		virtual double value( int pIdx , int cIdx ) const = 0;
+		virtual void set( int depth ) = 0;
+		virtual ~UpSampleEvaluator( void ){}
+	};
+};
+
+template< unsigned int FEMSig >
 class BSplineEvaluationData
 {
 public:
-	static const int Pad = (BType==BOUNDARY_FREE ) ? BSplineSupportSizes< Degree >::SupportEnd : ( (Degree&1) && BType==BOUNDARY_DIRICHLET ) ? -1 : 0;
+	static const unsigned int Degree = FEMSignature< FEMSig >::Degree;
+	static const int Pad = (FEMSignature< FEMSig >::BType==BOUNDARY_FREE ) ? BSplineSupportSizes< Degree >::SupportEnd : ( (Degree&1) && FEMSignature< FEMSig >::BType==BOUNDARY_DIRICHLET ) ? -1 : 0;
 	inline static int Begin( int depth ){ return -Pad; }
 	inline static int End  ( int depth ){ return (1<<depth) + (Degree&1) + Pad; }
 	inline static bool OutOfBounds( int depth , int offset ){ return offset<Begin(depth) || offset>=End(depth); }
@@ -215,22 +275,9 @@ class BSplineEvaluationData
 	static inline int IndexToOffset( int depth , int idx ){ return ( idx-Pad<=OffsetStart ? idx - Pad : ( BSplineSupportSizes< Degree >::Nodes(depth) + Pad - IndexSize + idx ) ); }
 
 	BSplineEvaluationData( void );
+	static double Value( int depth , int off , double s , int d );
+	static double Integral( int depth , int off , double b , double e , int d );
 
-	// [NOTE] The offset represents the node position, not the index of the function
-	static double Value( int depth , int off , double s , bool derivative );
-
-	// Note that this struct stores the components in left-to-right order
-	struct BSplineComponents
-	{
-	protected:
-		Polynomial< Degree > _polys[Degree+1];
-	public:
-		BSplineComponents( void ){ ; }
-		BSplineComponents( int depth , int offset );
-		const Polynomial< Degree >& operator[] ( int idx ) const { return _polys[idx]; }
-		BSplineComponents derivative( void ) const;
-		void printnl( void ) const { for( int d=0 ; d<=Degree ; d++ ) printf( "[%d] " , d ) , _polys[d].printnl(); }
-	};
 	struct BSplineUpSamplingCoefficients
 	{
 	protected:
@@ -241,119 +288,121 @@ class BSplineEvaluationData
 		double operator[] ( int idx ){ return (double)_coefficients[idx] / (1<<Degree); }
 	};
 
+	template< unsigned int D >
 	struct CenterEvaluator
 	{
-		struct Evaluator
+		struct Evaluator : public EvaluationData::CenterEvaluator
 		{
 		protected:
 			friend BSplineEvaluationData;
 			int _depth;
-			double _ccValues[2][IndexSize][BSplineSupportSizes< Degree >::SupportSize];
+			double _ccValues[D+1][IndexSize][BSplineSupportSizes< Degree >::SupportSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
 			Evaluator( void ){ _depth = 0 ; memset( _ccValues , 0 , sizeof(_ccValues) ); }
-#endif // BRUNO_LEVY_FIX
-			double value( int fIdx , int cIdx , bool d ) const;
+			double value( int fIdx , int cIdx , int d ) const;
 			int depth( void ) const { return _depth; }
+			void set( int depth ){ BSplineEvaluationData< FEMSig >::template SetCenterEvaluator< D >( *this , depth ); }
 		};
-		struct ChildEvaluator
+		struct ChildEvaluator : public EvaluationData::CenterEvaluator
 		{
 		protected:
 			friend BSplineEvaluationData;
 			int _parentDepth;
-			double _pcValues[2][IndexSize][BSplineSupportSizes< Degree >::ChildSupportSize];
+			double _pcValues[D+1][IndexSize][BSplineSupportSizes< Degree >::ChildSupportSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
 			ChildEvaluator( void ){ _parentDepth = 0 ; memset( _pcValues , 0 , sizeof(_pcValues) ); }
-#endif // BRUNO_LEVY_FIX
-			double value( int fIdx , int cIdx , bool d ) const;
+			double value( int fIdx , int cIdx , int d ) const;
 			int parentDepth( void ) const { return _parentDepth; }
 			int childDepth( void ) const { return _parentDepth+1; }
+			void set( int parentDepth ){ BSplineEvaluationData< FEMSig >::template SetChildCenterEvaluator< D >( *this , parentDepth ); }
 		};
 	};
-	static void SetCenterEvaluator( typename CenterEvaluator::Evaluator& evaluator , int depth );
-	static void SetChildCenterEvaluator( typename CenterEvaluator::ChildEvaluator& evaluator , int parentDepth );
+	template< unsigned int D > static void SetCenterEvaluator( typename CenterEvaluator< D >::Evaluator& evaluator , int depth );
+	template< unsigned int D > static void SetChildCenterEvaluator( typename CenterEvaluator< D >::ChildEvaluator& evaluator , int parentDepth );
 
+	template< unsigned int D >
 	struct CornerEvaluator
 	{
-		struct Evaluator
+		struct Evaluator : public EvaluationData::CornerEvaluator
 		{
 		protected:
 			friend BSplineEvaluationData;
 			int _depth;
-			double _ccValues[2][IndexSize][BSplineSupportSizes< Degree >::CornerSize];
+			double _ccValues[D+1][IndexSize][BSplineSupportSizes< Degree >::BCornerSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
 			Evaluator( void ){ _depth = 0 ; memset( _ccValues , 0 , sizeof( _ccValues ) ); }
-#endif // BRUNO_LEVY_FIX
-			double value( int fIdx , int cIdx , bool d ) const;
+			double value( int fIdx , int cIdx , int d ) const;
 			int depth( void ) const { return _depth; }
+			void set( int depth ){ BSplineEvaluationData< FEMSig >::template SetCornerEvaluator< D >( *this , depth ); }
 		};
-		struct ChildEvaluator
+		struct ChildEvaluator : public EvaluationData::CornerEvaluator
 		{
 		protected:
 			friend BSplineEvaluationData;
 			int _parentDepth;
-			double _pcValues[2][IndexSize][BSplineSupportSizes< Degree >::ChildCornerSize];
+			double _pcValues[D+1][IndexSize][BSplineSupportSizes< Degree >::ChildBCornerSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
 			ChildEvaluator( void ){ _parentDepth = 0 ; memset( _pcValues , 0 , sizeof( _pcValues ) ); }
-#endif // BRUNO_LEVY_FIX
-			double value( int fIdx , int cIdx , bool d ) const;
+			double value( int fIdx , int cIdx , int d ) const;
 			int parentDepth( void ) const { return _parentDepth; }
 			int childDepth( void ) const { return _parentDepth+1; }
+			void set( int parentDepth ){ BSplineEvaluationData< FEMSig >::template SetChildCornerEvaluator< D >( *this , parentDepth ); }
 		};
 	};
-	static void SetCornerEvaluator( typename CornerEvaluator::Evaluator& evaluator , int depth );
-	static void SetChildCornerEvaluator( typename CornerEvaluator::ChildEvaluator& evaluator , int parentDepth );
+	template< unsigned int D > static void SetCornerEvaluator( typename CornerEvaluator< D >::Evaluator& evaluator , int depth );
+	template< unsigned int D > static void SetChildCornerEvaluator( typename CornerEvaluator< D >::ChildEvaluator& evaluator , int parentDepth );
 
+	template< unsigned int D >
 	struct Evaluator
 	{
-		typename CenterEvaluator::Evaluator centerEvaluator;
-		typename CornerEvaluator::Evaluator cornerEvaluator;
-		double centerValue( int fIdx , int cIdx , bool d ) const { return centerEvaluator.value( fIdx , cIdx , d ); }
-		double cornerValue( int fIdx , int cIdx , bool d ) const { return cornerEvaluator.value( fIdx , cIdx , d ); }
+		typename CenterEvaluator< D >::Evaluator centerEvaluator;
+		typename CornerEvaluator< D >::Evaluator cornerEvaluator;
+		double centerValue( int fIdx , int cIdx , int d ) const { return centerEvaluator.value( fIdx , cIdx , d ); }
+		double cornerValue( int fIdx , int cIdx , int d ) const { return cornerEvaluator.value( fIdx , cIdx , d ); }
 	};
-	static void SetEvaluator( Evaluator& evaluator , int depth ){ SetCenterEvaluator( evaluator.centerEvaluator , depth ) , SetCornerEvaluator( evaluator.cornerEvaluator , depth ); }
-
+	template< unsigned int D > static void SetEvaluator( Evaluator< D >& evaluator , int depth ){ SetCenterEvaluator< D >( evaluator.centerEvaluator , depth ) , SetCornerEvaluator< D >( evaluator.cornerEvaluator , depth ); }
+	template< unsigned int D >
 	struct ChildEvaluator
 	{
-		typename CenterEvaluator::ChildEvaluator centerEvaluator;
-		typename CornerEvaluator::ChildEvaluator cornerEvaluator;
-		double centerValue( int fIdx , int cIdx , bool d ) const { return centerEvaluator.value( fIdx , cIdx , d ); }
-		double cornerValue( int fIdx , int cIdx , bool d ) const { return cornerEvaluator.value( fIdx , cIdx , d ); }
+		typename CenterEvaluator< D >::ChildEvaluator centerEvaluator;
+		typename CornerEvaluator< D >::ChildEvaluator cornerEvaluator;
+		double centerValue( int fIdx , int cIdx , int d ) const { return centerEvaluator.value( fIdx , cIdx , d ); }
+		double cornerValue( int fIdx , int cIdx , int d ) const { return cornerEvaluator.value( fIdx , cIdx , d ); }
 	};
-	static void SetChildEvaluator( ChildEvaluator& evaluator , int depth ){ SetChildCenterEvaluator( evaluator.centerEvaluator , depth ) , SetChildCornerEvaluator( evaluator.cornerEvaluator , depth ); }
+	template< unsigned int D > static void SetChildEvaluator( ChildEvaluator< D >& evaluator , int depth ){ SetChildCenterEvaluator< D >( evaluator.centerEvaluator , depth ) , SetChildCornerEvaluator< D >( evaluator.cornerEvaluator , depth ); }
 
-	struct UpSampleEvaluator
+	struct UpSampleEvaluator : public EvaluationData::UpSampleEvaluator
 	{
 	protected:
 		friend BSplineEvaluationData;
 		int _lowDepth;
 		double _pcValues[IndexSize][BSplineSupportSizes< Degree >::UpSampleSize];
 	public:
-#ifdef BRUNO_LEVY_FIX
 		UpSampleEvaluator( void ){ _lowDepth = 0 ; memset( _pcValues , 0 , sizeof( _pcValues ) ); }
-#endif // BRUNO_LEVY_FIX
 		double value( int pIdx , int cIdx ) const;
 		int lowDepth( void ) const { return _lowDepth; }
+		void set( int lowDepth ){ BSplineEvaluationData::SetUpSampleEvaluator( *this , lowDepth ); }
 	};
 	static void SetUpSampleEvaluator( UpSampleEvaluator& evaluator , int lowDepth );
 };
 
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 class BSplineIntegrationData
 {
 public:
-	static const int OffsetStart = - BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSupportStart , OffsetStop = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSupportEnd + ( Degree1&1 ) , IndexSize = OffsetStart + OffsetStop + 1 + 2 * BSplineEvaluationData< Degree1 , BType1 >::Pad;
+	static const unsigned int Degree1 = FEMSignature< FEMSig1 >::Degree;
+	static const unsigned int Degree2 = FEMSignature< FEMSig2 >::Degree;
+	static const int OffsetStart = - BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSupportStart;
+	static const int OffsetStop  =   BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSupportEnd + ( Degree1&1 );
+	static const int IndexSize = OffsetStart + OffsetStop + 1 + 2 * BSplineEvaluationData< FEMSig1 >::Pad;
 	static int OffsetToIndex( int depth , int offset )
 	{
 		int dim = BSplineSupportSizes< Degree1 >::Nodes( depth );
-		if     ( offset<OffsetStart )     return BSplineEvaluationData< Degree1 , BType1 >::Pad + offset;
-		else if( offset>=dim-OffsetStop ) return BSplineEvaluationData< Degree1 , BType1 >::Pad + OffsetStart + 1 + offset - ( dim-OffsetStop );
-		else                              return BSplineEvaluationData< Degree1 , BType1 >::Pad + OffsetStart;
+		if     ( offset<OffsetStart )     return BSplineEvaluationData< FEMSig1 >::Pad + offset;
+		else if( offset>=dim-OffsetStop ) return BSplineEvaluationData< FEMSig1 >::Pad + OffsetStart + 1 + offset - ( dim-OffsetStop );
+		else                              return BSplineEvaluationData< FEMSig1 >::Pad + OffsetStart;
 	}
-	static inline int IndexToOffset( int depth , int idx ){ return ( idx-BSplineEvaluationData< Degree1 , BType1 >::Pad<=OffsetStart ? idx-BSplineEvaluationData< Degree1 , BType1 >::Pad : ( BSplineSupportSizes< Degree1 >::Nodes(depth) + BSplineEvaluationData< Degree1 , BType1 >::Pad - IndexSize + idx ) ); }
+	static inline int IndexToOffset( int depth , int idx ){ return ( idx-BSplineEvaluationData< FEMSig1 >::Pad<=OffsetStart ? idx-BSplineEvaluationData< FEMSig1 >::Pad : ( BSplineSupportSizes< Degree1 >::Nodes(depth) + BSplineEvaluationData< FEMSig1 >::Pad - IndexSize + idx ) ); }
 
 	template< unsigned int D1 , unsigned int D2 > static double Dot( int depth1 , int off1 , int depth2 , int off2 );
 	// An index is interiorly overlapped if the support of its overlapping neighbors is in the range [0,1<<depth)
@@ -361,7 +410,7 @@ class BSplineIntegrationData
 
 	struct FunctionIntegrator
 	{
-		template< unsigned int D1 , unsigned int D2 >
+		template< unsigned int D1=Degree1 , unsigned int D2=Degree2 >
 		struct Integrator
 		{
 		protected:
@@ -369,13 +418,16 @@ class BSplineIntegrationData
 			int _depth;
 			double _ccIntegrals[D1+1][D2+1][IndexSize][BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
-			Integrator( void ){ _depth = 0 ; memset(_ccIntegrals, 0, sizeof(_ccIntegrals)); }
-#endif // BRUNO_LEVY_FIX
+			Integrator( void )
+			{
+				_depth = 0;
+				memset(_ccIntegrals, 0, sizeof(_ccIntegrals));
+			}
 			double dot( int fIdx1 , int fidx2 , int d1 , int d2 ) const;
 			int depth( void ) const { return _depth; }
+			void set( int depth ){ BSplineIntegrationData::SetIntegrator( *this , depth ); }
 		};
-		template< unsigned int D1 , unsigned int D2 >
+		template< unsigned int D1=Degree1 , unsigned int D2=Degree2 >
 		struct ChildIntegrator
 		{
 		protected:
@@ -383,12 +435,15 @@ class BSplineIntegrationData
 			int _parentDepth;
 			double _pcIntegrals[D1+1][D2+1][IndexSize][BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapSize];
 		public:
-#ifdef BRUNO_LEVY_FIX
-			ChildIntegrator( void ){ _parentDepth = 0 ; memset( _pcIntegrals , 0 , sizeof( _pcIntegrals ) ); }
-#endif // BRUNO_LEVY_FIX
+			ChildIntegrator( void )
+			{
+				_parentDepth = 0;
+				memset( _pcIntegrals , 0 , sizeof( _pcIntegrals ) ); 
+			}
 			double dot( int fIdx1 , int fidx2 , int d1 , int d2 ) const;
 			int parentDepth( void ) const { return _parentDepth; }
 			int childDepth( void ) const { return _parentDepth+1; }
+			void set( int depth ){ BSplineIntegrationData::SetChildIntegrator( *this , depth ); }
 		};
 	};
 	// D1 and D2 indicate the number of derivatives that should be taken
@@ -441,30 +496,74 @@ class BSplineIntegrationData
 #undef SMALLEST_INTEGER_LARGER_THAN_OR_EQUAL_TO_HALF
 #undef LARGEST_INTEGER_SMALLER_THAN_OR_EQUAL_TO_HALF
 
-template< int Degree , BoundaryType BType >
+
+template< unsigned int FEMSig , unsigned int D=0 >
 struct BSplineData
 {
-	inline static int TotalFunctionCount( int depth ){ return depth<0 ? 0 : (1<<(depth+1)) - 1 + (depth+1) * ( (Degree&1) + 2 * BSplineEvaluationData< Degree , BType >::Pad ); }
-	inline static int FunctionIndex( int depth , int offset ){ return TotalFunctionCount( depth-1 ) + offset + BSplineEvaluationData< Degree , BType >::Pad; }
-	inline static void FactorFunctionIndex( int idx , int& depth , int& offset )
+	static const unsigned int Degree = FEMSignature< FEMSig >::Degree;
+	static const int _Degree = Degree;
+	// Note that this struct stores the components in left-to-right order
+	struct BSplineComponents
 	{
-		int dim;
-		depth = 0;
-		while( idx>=( dim = BSplineEvaluationData< Degree , BType >::End( depth ) - BSplineEvaluationData< Degree , BType >::Begin( depth ) ) ) idx -= dim , depth++;
-		offset = idx - BSplineEvaluationData< Degree , BType >::Pad;
-	}
-	inline static void FunctionSpan( int depth , int& fStart , int& fEnd ){ fStart = TotalFunctionCount( depth-1 ) , fEnd = TotalFunctionCount( depth ); }
-	inline static int RemapOffset( int depth , int idx , bool& reflect );
+		BSplineComponents( void ){ ; }
+		BSplineComponents( int depth , int offset );
+		const Polynomial< Degree >* operator[] ( int idx ) const { return _polys[idx]; }
+	protected:
+		Polynomial< Degree > _polys[Degree+1][D+1];
+	};
+	struct SparseBSplineEvaluator
+	{
+		void init( unsigned int depth )
+		{
+			_depth = depth , _width = 1./(1<<depth);
+			// _preStart + BSplineSupportSizes< _Degree >::SupportEnd >=0
+			_preStart = -BSplineSupportSizes< _Degree >::SupportEnd;
+			// _postStart + BSplineSupportSizes< _Degree >::SupportEnd <= (1<<depth)-1
+			_postStart = (1<<depth) - 1 - BSplineSupportSizes< _Degree >::SupportEnd;
+			_preEnd = _preStart + _Degree + 1;
+			_postEnd = _postStart + _Degree + 1;
+			_centerIndex = ( ( _preStart + _Degree + 1 ) + ( _postStart - 1 ) ) / 2;
+			_centerComponents = BSplineComponents( depth , _centerIndex );
+			for( int i=0 ; i<=Degree ; i++ ) _preComponents[i] = BSplineComponents( depth , _preStart+i ) , _postComponents[i] = BSplineComponents( depth , _postStart+i );
+		}
+		double value( double p ,            int fIdx , int d ) const { return value( p , (int)( p * (1<<_depth ) ) , fIdx , d ); }
+		double value( double p , int pIdx , int fIdx , int d ) const
+		{
+			if     ( fIdx<_preStart  ) return 0;
+			else if( fIdx<_preEnd    ) return _preComponents [fIdx-_preStart ][pIdx-fIdx+_LeftSupportRadius][d]( p );
+			else if( fIdx<_postStart ) return _centerComponents               [pIdx-fIdx+_LeftSupportRadius][d]( p+_width*(_centerIndex-fIdx) );
+			else if( fIdx<_postEnd   ) return _postComponents[fIdx-_postStart][pIdx-fIdx+_LeftSupportRadius][d]( p );
+			else                       return 0;
+		}
+		const Polynomial< _Degree >* polynomialsAndOffset( double& p ,            int fIdx ) const { return polynomialsAndOffset( p , (int)( p * (1<<_depth ) ) , fIdx ); }
+		const Polynomial< _Degree >* polynomialsAndOffset( double& p , int pIdx , int fIdx ) const
+		{
+			if     ( fIdx<_preEnd    ){                                   return _preComponents [fIdx-_preStart ][pIdx-fIdx+_LeftSupportRadius]; }
+			else if( fIdx<_postStart ){ p += _width*(_centerIndex-fIdx) ; return _centerComponents               [pIdx-fIdx+_LeftSupportRadius]; }
+			else                      {                                   return _postComponents[fIdx-_postStart][pIdx-fIdx+_LeftSupportRadius]; }
+		}
+	protected:
+		static const int _LeftSupportRadius = -BSplineSupportSizes< _Degree >::SupportStart;
+		BSplineComponents _preComponents[_Degree+1] , _postComponents[_Degree+1] ,_centerComponents;
+		int _preStart , _preEnd , _postStart , _postEnd , _centerIndex;
+		unsigned int _depth;
+		double _width;
+	};
+	const SparseBSplineEvaluator& operator[]( int depth ) const { return _evaluators[depth]; }
 
-	size_t functionCount;
-	Pointer( typename BSplineEvaluationData< Degree , BType >::BSplineComponents )  baseBSplines;
-	Pointer( typename BSplineEvaluationData< Degree , BType >::BSplineComponents ) dBaseBSplines;
+	inline static int RemapOffset( int depth , int idx , bool& reflect );
 
+	BSplineData( void );
+	void reset( int maxDepth );
 	BSplineData( int maxDepth );
 	~BSplineData( void );
+
+protected:
+	unsigned int _maxDepth;
+	Pointer( SparseBSplineEvaluator ) _evaluators;
 };
 
-template< int Degree1 , int Degree2 > void SetBSplineElementIntegrals( double integrals[Degree1+1][Degree2+1] );
+template< unsigned int Degree1 , unsigned int Degree2 > void SetBSplineElementIntegrals( double integrals[Degree1+1][Degree2+1] );
 
 
 #include "BSplineData.inl"
diff --git a/Src/BSplineData.inl b/Src/BSplineData.inl
index 225fed8..5954e73 100644
--- a/Src/BSplineData.inl
+++ b/Src/BSplineData.inl
@@ -29,25 +29,50 @@ DAMAGE.
 ///////////////////////////
 // BSplineEvaluationData //
 ///////////////////////////
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::Value( int depth , int off , double s , bool derivative )
+template< unsigned int FEMSig >
+double BSplineEvaluationData< FEMSig >::Value( int depth , int off , double s , int d )
 {
 	if( s<0 || s>1 ) return 0.;
 
 	int res = 1<<depth;
 	if( OutOfBounds( depth , off ) ) return 0;
 
-	BSplineComponents components = BSplineComponents( depth , off );
+	typename BSplineData< FEMSig , Degree >::BSplineComponents components( depth , off );
 
 	// [NOTE] This is an ugly way to ensure that when s=1 we evaluate using a B-Spline component within the valid range.
 	int ii = std::max< int >( 0 , std::min< int >( res-1 , (int)floor( s * res ) ) ) - off;
 
 	if( ii<BSplineSupportSizes< Degree >::SupportStart || ii>BSplineSupportSizes< Degree >::SupportEnd ) return 0;
-	if( derivative ) return components[ ii-BSplineSupportSizes< Degree >::SupportStart ].derivative()(s);
-	else             return components[ ii-BSplineSupportSizes< Degree >::SupportStart ](s);
+	return d<=Degree ? components[ii-BSplineSupportSizes< Degree >::SupportStart][d](s) : 0;
 }
-template< int Degree , BoundaryType BType >
-void BSplineEvaluationData< Degree , BType >::SetCenterEvaluator( typename CenterEvaluator::Evaluator& evaluator , int depth )
+template< unsigned int FEMSig >
+double BSplineEvaluationData< FEMSig >::Integral( int depth , int off , double b , double e , int d )
+{
+	double integral = 0;
+	// Check for valid integration bounds
+	if( OutOfBounds( depth , off ) ) return 0;
+	if( b>=e || b>=1 || e<=0 ) return 0;
+	if( b<0 ) b=0;
+	if( e>1 ) e=1;
+
+	int res = 1<<depth;
+	double _b = ( (double)( off     + BSplineSupportSizes< Degree >::SupportStart ) )/res;
+	double _e = ( (double)( off + 1 + BSplineSupportSizes< Degree >::SupportEnd   ) )/res;
+	if( b>=_e || e<=_b ) return 0;
+	typename BSplineData< FEMSig , Degree >::BSplineComponents components( depth , off );
+	for( int i=BSplineSupportSizes< Degree >::SupportStart ; i<=BSplineSupportSizes< Degree >::SupportEnd ; i++ )
+	{
+		// The index of the current cell
+		int c = off + i;
+		// The bounds of the current cell
+		_b = std::max< double >( b , ( (double)c ) / res ) , _e = std::min< double >( e , ( (double)(c+1) )/res );
+		if( _b<_e ) integral += d<=Degree ? components[i-BSplineSupportSizes< Degree >::SupportStart][d].integral( _b , _e ) : 0;
+	}
+	return integral;
+}
+template< unsigned int FEMSig >
+template< unsigned int D >
+void BSplineEvaluationData< FEMSig >::SetCenterEvaluator( typename CenterEvaluator< D >::Evaluator& evaluator , int depth )
 {
 	evaluator._depth = depth;
 	int res = 1<<depth;
@@ -55,11 +80,12 @@ void BSplineEvaluationData< Degree , BType >::SetCenterEvaluator( typename Cente
 	{
 		int ii = IndexToOffset( depth , i );
 		double s = 0.5 + ii + j;
-		for( int d1=0 ; d1<2 ; d1++ ) evaluator._ccValues[d1][i][j-BSplineSupportSizes< Degree >::SupportStart] = Value( depth , ii , s/res , d1!=0 );
+		for( int d1=0 ; d1<=D ; d1++ ) evaluator._ccValues[d1][i][j-BSplineSupportSizes< Degree >::SupportStart] = Value( depth , ii , s/res , d1 );
 	}
 }
-template< int Degree , BoundaryType BType >
-void BSplineEvaluationData< Degree , BType >::SetChildCenterEvaluator( typename CenterEvaluator::ChildEvaluator& evaluator , int parentDepth )
+template< unsigned int FEMSig >
+template< unsigned int D >
+void BSplineEvaluationData< FEMSig >::SetChildCenterEvaluator( typename CenterEvaluator< D >::ChildEvaluator& evaluator , int parentDepth )
 {
 	evaluator._parentDepth = parentDepth;
 	int res = 1<<(parentDepth+1);
@@ -67,49 +93,89 @@ void BSplineEvaluationData< Degree , BType >::SetChildCenterEvaluator( typename
 	{
 		int ii = IndexToOffset( parentDepth , i );
 		double s = 0.5 + 2*ii + j;
-		for( int d1=0 ; d1<2 ; d1++ ) evaluator._pcValues[d1][i][j-BSplineSupportSizes< Degree >::ChildSupportStart] = Value( parentDepth , ii , s/res , d1!=0 );
+		for( int d1=0 ; d1<=D ; d1++ ) evaluator._pcValues[d1][i][j-BSplineSupportSizes< Degree >::ChildSupportStart] = Value( parentDepth , ii , s/res , d1 );
 	}
 }
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::CenterEvaluator::Evaluator::value( int fIdx , int cIdx , bool d ) const
+template< unsigned int FEMSig >
+template< unsigned int D >
+double BSplineEvaluationData< FEMSig >::CenterEvaluator< D >::Evaluator::value( int fIdx , int cIdx , int d ) const
 {
 	int dd = cIdx-fIdx , res = 1<<(_depth);
 	if( cIdx<0 || cIdx>=res || OutOfBounds( _depth , fIdx ) || dd<BSplineSupportSizes< Degree >::SupportStart || dd>BSplineSupportSizes< Degree >::SupportEnd ) return 0;
-	return _ccValues[d?1:0][ OffsetToIndex( _depth , fIdx ) ][dd-BSplineSupportSizes< Degree >::SupportStart];
+	return _ccValues[d][ OffsetToIndex( _depth , fIdx ) ][dd-BSplineSupportSizes< Degree >::SupportStart];
 }
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::CenterEvaluator::ChildEvaluator::value( int fIdx , int cIdx , bool d ) const
+template< unsigned int FEMSig >
+template< unsigned int D >
+double BSplineEvaluationData< FEMSig >::CenterEvaluator< D >::ChildEvaluator::value( int fIdx , int cIdx , int d ) const
 {
 	int dd = cIdx-2*fIdx , res = 1<<(_parentDepth+1);
 	if( cIdx<0 || cIdx>=res || OutOfBounds( _parentDepth , fIdx ) || dd<BSplineSupportSizes< Degree >::ChildSupportStart || dd>BSplineSupportSizes< Degree >::ChildSupportEnd ) return 0;
-	return _pcValues[d?1:0][ OffsetToIndex( _parentDepth , fIdx ) ][dd-BSplineSupportSizes< Degree >::ChildSupportStart];
+	return _pcValues[d][ OffsetToIndex( _parentDepth , fIdx ) ][dd-BSplineSupportSizes< Degree >::ChildSupportStart];
 }
-template< int Degree , BoundaryType BType >
-void BSplineEvaluationData< Degree , BType >::SetCornerEvaluator( typename CornerEvaluator::Evaluator& evaluator , int depth )
+template< unsigned int FEMSig >
+template< unsigned int D >
+void BSplineEvaluationData< FEMSig >::SetCornerEvaluator( typename CornerEvaluator< D >::Evaluator& evaluator , int depth )
 {
 	evaluator._depth = depth;
 	int res = 1<<depth;
-	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineSupportSizes< Degree >::CornerStart ; j<=BSplineSupportSizes< Degree >::CornerEnd ; j++ )
+	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineSupportSizes< Degree >::BCornerStart ; j<=BSplineSupportSizes< Degree >::BCornerEnd ; j++ )
 	{
 		int ii = IndexToOffset( depth , i );
 		double s = ii + j;
-		for( int d1=0 ; d1<2 ; d1++ ) evaluator._ccValues[d1][i][j-BSplineSupportSizes< Degree >::CornerStart] = Value( depth , ii , s/res , d1!=0 );
+		int jj = j-BSplineSupportSizes< Degree >::BCornerStart;
+		for( int d1=0 ; d1<=D ; d1++ )
+		{
+			if( d1==Degree )
+			{
+				if     ( j==BSplineSupportSizes< Degree >::BCornerStart ) evaluator._ccValues[d1][i][jj] = (                                            Value( depth , ii , ( s+0.5 )/res , d1 ) ) / 2;
+				else if( j==BSplineSupportSizes< Degree >::BCornerEnd   ) evaluator._ccValues[d1][i][jj] = ( Value( depth , ii , ( s-0.5 )/res , d1 )                                            ) / 2;
+				else                                                      evaluator._ccValues[d1][i][jj] = ( Value( depth , ii , ( s-0.5 )/res , d1 ) + Value( depth , ii , ( s+0.5 )/res , d1 ) ) / 2;
+			}
+			else evaluator._ccValues[d1][i][jj] = Value( depth , ii , s /res , d1 );
+		}
 	}
 }
-template< int Degree , BoundaryType BType >
-void BSplineEvaluationData< Degree , BType >::SetChildCornerEvaluator( typename CornerEvaluator::ChildEvaluator& evaluator , int parentDepth )
+template< unsigned int FEMSig >
+template< unsigned int D  >
+void BSplineEvaluationData< FEMSig >::SetChildCornerEvaluator( typename CornerEvaluator< D >::ChildEvaluator& evaluator , int parentDepth )
 {
 	evaluator._parentDepth = parentDepth;
 	int res = 1<<(parentDepth+1);
-	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineSupportSizes< Degree >::ChildCornerStart ; j<=BSplineSupportSizes< Degree >::ChildCornerEnd ; j++ )
+	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineSupportSizes< Degree >::ChildBCornerStart ; j<=BSplineSupportSizes< Degree >::ChildBCornerEnd ; j++ )
 	{
 		int ii = IndexToOffset( parentDepth , i );
 		double s = 2*ii + j;
-		for( int d1=0 ; d1<2 ; d1++ ) evaluator._pcValues[d1][i][j-BSplineSupportSizes< Degree >::ChildCornerStart] = Value( parentDepth , ii , s/res , d1!=0 );
+		int jj = j-BSplineSupportSizes< Degree >::ChildBCornerStart;
+		for( int d1=0 ; d1<=D ; d1++ )
+		{
+			if( d1==Degree )
+			{
+				if     ( j==BSplineSupportSizes< Degree >::ChildBCornerStart ) evaluator._pcValues[d1][i][jj] = (                                                  Value( parentDepth , ii , ( s+0.5 )/res , d1 ) ) / 2;
+				else if( j==BSplineSupportSizes< Degree >::ChildBCornerEnd   ) evaluator._pcValues[d1][i][jj] = ( Value( parentDepth , ii , ( s-0.5 )/res , d1 )                                                  ) / 2;
+				else                                                           evaluator._pcValues[d1][i][jj] = ( Value( parentDepth , ii , ( s-0.5 )/res , d1 ) + Value( parentDepth , ii , ( s+0.5 )/res , d1 ) ) / 2;
+			}
+			else evaluator._pcValues[d1][i][jj] = Value( parentDepth , ii , s /res , d1 );
+		}
 	}
 }
-template< int Degree , BoundaryType BType >
-void BSplineEvaluationData< Degree , BType >::SetUpSampleEvaluator( UpSampleEvaluator& evaluator , int lowDepth )
+template< unsigned int FEMSig >
+template< unsigned int D >
+double BSplineEvaluationData< FEMSig >::CornerEvaluator< D >::Evaluator::value( int fIdx , int cIdx , int d ) const
+{
+	int dd = cIdx-fIdx , res = ( 1<<_depth ) + 1;
+	if( cIdx<0 || cIdx>=res || OutOfBounds( _depth , fIdx ) || dd<BSplineSupportSizes< Degree >::BCornerStart || dd>BSplineSupportSizes< Degree >::BCornerEnd ) return 0;
+	return _ccValues[d][ OffsetToIndex( _depth , fIdx ) ][dd-BSplineSupportSizes< Degree >::BCornerStart];
+}
+template< unsigned int FEMSig >
+template< unsigned int D >
+double BSplineEvaluationData< FEMSig >::CornerEvaluator< D >::ChildEvaluator::value( int fIdx , int cIdx , int d ) const
+{
+	int dd = cIdx-2*fIdx , res = ( 1<<(_parentDepth+1) ) + 1;
+	if( cIdx<0 || cIdx>=res || OutOfBounds( _parentDepth , fIdx ) || dd<BSplineSupportSizes< Degree >::ChildBCornerStart || dd>BSplineSupportSizes< Degree >::ChildBCornerEnd ) return 0;
+	return _pcValues[d][ OffsetToIndex( _parentDepth , fIdx ) ][dd-BSplineSupportSizes< Degree >::ChildBCornerStart];
+}
+template< unsigned int FEMSig >
+void BSplineEvaluationData< FEMSig >::SetUpSampleEvaluator( UpSampleEvaluator& evaluator , int lowDepth )
 {
 	evaluator._lowDepth = lowDepth;
 	for( int i=0 ; i<IndexSize ; i++ )
@@ -119,75 +185,26 @@ void BSplineEvaluationData< Degree , BType >::SetUpSampleEvaluator( UpSampleEval
 		for( int j=0 ; j<BSplineSupportSizes< Degree >::UpSampleSize ; j++ ) evaluator._pcValues[i][j] = b[j];
 	}
 }
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::CornerEvaluator::Evaluator::value( int fIdx , int cIdx , bool d ) const
-{
-	int dd = cIdx-fIdx , res = ( 1<<_depth ) + 1;
-	if( cIdx<0 || cIdx>=res || OutOfBounds( _depth , fIdx ) || dd<BSplineSupportSizes< Degree >::CornerStart || dd>BSplineSupportSizes< Degree >::CornerEnd ) return 0;
-	return _ccValues[d?1:0][ OffsetToIndex( _depth , fIdx ) ][dd-BSplineSupportSizes< Degree >::CornerStart];
-}
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::CornerEvaluator::ChildEvaluator::value( int fIdx , int cIdx , bool d ) const
-{
-	int dd = cIdx-2*fIdx , res = ( 1<<(_parentDepth+1) ) + 1;
-	if( cIdx<0 || cIdx>=res || OutOfBounds( _parentDepth , fIdx ) || dd<BSplineSupportSizes< Degree >::ChildCornerStart || dd>BSplineSupportSizes< Degree >::ChildCornerEnd ) return 0;
-	return _pcValues[d?1:0][ OffsetToIndex( _parentDepth , fIdx ) ][dd-BSplineSupportSizes< Degree >::ChildCornerStart];
-}
-template< int Degree , BoundaryType BType >
-double BSplineEvaluationData< Degree , BType >::UpSampleEvaluator::value( int pIdx , int cIdx ) const
+template< unsigned int FEMSig >
+double BSplineEvaluationData< FEMSig >::UpSampleEvaluator::value( int pIdx , int cIdx ) const
 {
 	int dd = cIdx-2*pIdx;
 	if( OutOfBounds( _lowDepth+1 , cIdx ) || OutOfBounds( _lowDepth , pIdx ) || dd<BSplineSupportSizes< Degree >::UpSampleStart || dd>BSplineSupportSizes< Degree >::UpSampleEnd ) return 0;
 	return _pcValues[ OffsetToIndex( _lowDepth , pIdx ) ][dd-BSplineSupportSizes< Degree >::UpSampleStart];
 }
 
-//////////////////////////////////////////////
-// BSplineEvaluationData::BSplineComponents //
-//////////////////////////////////////////////
-template< int Degree , BoundaryType BType >
-BSplineEvaluationData< Degree , BType >::BSplineComponents::BSplineComponents( int depth , int offset )
-{
-	int res = 1<<depth;
-	BSplineElements< Degree > elements( res , offset , BType );
-
-	// The first index is the position, the second is the element type
-	Polynomial< Degree > components[Degree+1][Degree+1];
-	// Generate the elements that can appear in the base function corresponding to the base function at (depth,offset) = (0,0)
-	for( int d=0 ; d<=Degree ; d++ ) for( int dd=0 ; dd<=Degree ; dd++ ) components[d][dd] = Polynomial< Degree >::BSplineComponent( Degree-dd ).shift( -( (Degree+1)/2 ) + d );
-
-	// Now adjust to the desired depth and offset
-	double width = 1. / res;
-	for( int d=0 ; d<=Degree ; d++ ) for( int dd=0 ; dd<=Degree ; dd++ ) components[d][dd] = components[d][dd].scale( width ).shift( width*offset );
-
-	// Now write in the polynomials
-	for( int d=0 ; d<=Degree ; d++ )
-	{
-		int idx = offset + BSplineSupportSizes< Degree >::SupportStart + d;
-		_polys[d] = Polynomial< Degree >();
-
-		if( idx>=0 && idx<res ) for( int dd=0 ; dd<=Degree ; dd++ ) _polys[d] += components[d][dd] * ( ( double )( elements[idx][dd] ) ) / elements.denominator;
-	}
-}
-
-template< int Degree , BoundaryType BType >
-typename BSplineEvaluationData< Degree , BType >::BSplineComponents BSplineEvaluationData< Degree , BType >::BSplineComponents::derivative( void ) const
-{
-	BSplineComponents b = (*this);
-	for( int d=0 ; d<=Degree ; d++ ) b._polys[d] = b._polys[d].derivative();
-	return b;
-}
-
 //////////////////////////////////////////////////////////
 // BSplineEvaluationData::BSplineUpSamplingCoefficients //
 //////////////////////////////////////////////////////////
-template< int Degree , BoundaryType BType >
-BSplineEvaluationData< Degree , BType >::BSplineUpSamplingCoefficients::BSplineUpSamplingCoefficients( int depth , int offset )
+template< unsigned int FEMSig >
+BSplineEvaluationData< FEMSig >::BSplineUpSamplingCoefficients::BSplineUpSamplingCoefficients( int depth , int offset )
 {
+	static const BoundaryType BType = FEMSignature< FEMSig >::BType;
 	// [ 1/8 1/2 3/4 1/2 1/8]
 	// [ 1 , 1 ] ->  [ 3/4 , 1/2 , 1/8 ] + [ 1/8 , 1/2 , 3/4 ] = [ 7/8 , 1 , 7/8 ]
 	int dim = BSplineSupportSizes< Degree >::Nodes(depth) , _dim = BSplineSupportSizes< Degree >::Nodes(depth+1);
 	bool reflect;
-	offset = BSplineData< Degree , BType >::RemapOffset( depth , offset , reflect );
+	offset = BSplineData< FEMSig >::RemapOffset( depth , offset , reflect );
 	int multiplier = ( BType==BOUNDARY_DIRICHLET && reflect ) ? -1 : 1;
 	bool useReflected = ( BType!=BOUNDARY_FREE ) && ( BSplineSupportSizes< Degree >::Inset || ( offset % ( dim-1 ) ) );
 	int b[ BSplineSupportSizes< Degree >::UpSampleSize ];
@@ -201,7 +218,7 @@ BSplineEvaluationData< Degree , BType >::BSplineUpSamplingCoefficients::BSplineU
 	for( int i=BSplineSupportSizes< Degree >::UpSampleStart ; i<=BSplineSupportSizes< Degree >::UpSampleEnd ; i++ )
 	{
 		int _offset = 2*offset+i;
-		_offset = BSplineData< Degree , BType >::RemapOffset( depth+1 , _offset , reflect );
+		_offset = BSplineData< FEMSig >::RemapOffset( depth+1 , _offset , reflect );
 		if( useReflected || !reflect )
 		{
 			int _multiplier = multiplier * ( ( BType==BOUNDARY_DIRICHLET && reflect ) ? -1 : 1 );
@@ -210,7 +227,7 @@ BSplineEvaluationData< Degree , BType >::BSplineUpSamplingCoefficients::BSplineU
 		// If we are not inset and we are at the boundary, use the reflection as well
 		if( BType!=BOUNDARY_FREE && !BSplineSupportSizes< Degree >::Inset && ( offset % (dim-1) ) && !( _offset % (_dim-1) ) )
 		{
-			_offset = BSplineData< Degree , BType >::RemapOffset( depth+1 , _offset , reflect );
+			_offset = BSplineData< FEMSig >::RemapOffset( depth+1 , _offset , reflect );
 			int _multiplier = multiplier * ( ( BType==BOUNDARY_DIRICHLET && reflect ) ? -1 : 1 );
 			if( BType==BOUNDARY_DIRICHLET ) _multiplier *= -1;
 			coefficients[ _offset ] += b[ i-BSplineSupportSizes< Degree >::UpSampleStart ] * _multiplier;
@@ -221,27 +238,60 @@ BSplineEvaluationData< Degree , BType >::BSplineUpSamplingCoefficients::BSplineU
 ////////////////////////////
 // BSplineIntegrationData //
 ////////////////////////////
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 >
-double BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::Dot( int depth1 ,  int off1 , int depth2 , int off2 )
+double BSplineIntegrationData< FEMSig1 , FEMSig2 >::Dot( int depth1 ,  int off1 , int depth2 , int off2 )
 {
-	if( D1>Degree1 ) fprintf( stderr , "[ERROR] BSplineIntegrationData::Dot: taking more derivatives than the degree: %d > %d\n" , D1 , Degree1 ) , exit( 0 );
-	if( D2>Degree2 ) fprintf( stderr , "[ERROR] BSplineIntegrationData::Dot: taking more derivatives than the degree: %d > %d\n" , D2 , Degree2 ) , exit( 0 );
+	if( D1>Degree1 ) ERROR_OUT( "Taking more derivatives than the degree: %d > %d" , D1 , Degree1 );
+	if( D2>Degree2 ) ERROR_OUT( "Taking more derivatives than the degree: %d > %d" , D2 , Degree2 );
 	const int _Degree1 = ( Degree1>=D1 ) ? Degree1 - D1 : 0 , _Degree2 = ( Degree2>=D2 ) ? Degree2 - D2 : 0;
 	int sums[ Degree1+1 ][ Degree2+1 ];
 
 	int depth = std::max< int >( depth1 , depth2 );
 
-	BSplineElements< Degree1 > b1( 1<<depth1 , off1 , BType1 );
-	BSplineElements< Degree2 > b2( 1<<depth2 , off2 , BType2 );
-
+	BSplineElements< Degree1 > b1;
+	BSplineElements< Degree2 > b2;
+	if( BSplineSupportSizes< Degree1 >::IsInteriorlySupported( depth1 , off1 ) && BSplineSupportSizes< Degree2 >::IsInteriorlySupported( depth2 , off2 ) )
 	{
-		BSplineElements< Degree1 > b;
-		while( depth1<depth ) b=b1 , b.upSample( b1 ) , depth1++;
+		if( depth1<depth2 )
+		{
+			int begin1 , end1 , res = 1 - BSplineSupportSizes< Degree1 >::SupportStart + BSplineSupportSizes< Degree1 >::SupportEnd;
+			BSplineSupportSizes< Degree1 >::InteriorSupportedSpan( depth1 , begin1 , end1 );
+			b1 = BSplineElements< Degree1 >( res , begin1 , BOUNDARY_FREE );
+			for( int d=depth1 ; d<depth2 ; d++ )
+			{
+				BSplineElements< Degree1 > b=b1;
+				b.upSample( b1 );
+				res <<= 1;
+			}
+			b2 = BSplineElements< Degree2 >( res , off2 - ( (off1-begin1)<<(depth2-depth1) ) , BOUNDARY_FREE );
+		}
+		else
+		{
+			int begin2 , end2 , res = 1 - BSplineSupportSizes< Degree2 >::SupportStart + BSplineSupportSizes< Degree2 >::SupportEnd;
+			BSplineSupportSizes< Degree2 >::InteriorSupportedSpan( depth2 , begin2 , end2 );
+			b2 = BSplineElements< Degree2 >( res , begin2 , BOUNDARY_FREE );
+			for( int d=depth2 ; d<depth1 ; d++ )
+			{
+				BSplineElements< Degree2 > b=b2;
+				b.upSample( b2 );
+				res <<= 1;
+			}
+			b1 = BSplineElements< Degree1 >( res , off1 - ( (off2-begin2)<<(depth1-depth2) ) , BOUNDARY_FREE );
+		}
 	}
+	else
 	{
-		BSplineElements< Degree2 > b;
-		while( depth2<depth ) b=b2 , b.upSample( b2 ) , depth2++;
+		b1 = BSplineElements< Degree1 >( 1<<depth1 , off1 , FEMSignature< FEMSig1 >::BType );
+		b2 = BSplineElements< Degree2 >( 1<<depth2 , off2 , FEMSignature< FEMSig2 >::BType );
+		{
+			BSplineElements< Degree1 > b;
+			while( depth1<depth ) b=b1 , b.upSample( b1 ) , depth1++;
+		}
+		{
+			BSplineElements< Degree2 > b;
+			while( depth2<depth ) b=b2 , b.upSample( b2 ) , depth2++;
+		}
 	}
 
 	BSplineElements< Degree1-D1 > db1;
@@ -279,67 +329,66 @@ double BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::Dot( int d
 		SetBSplineElementIntegrals< _Degree1 , _Degree2 >( integrals );
 		for( int j=0 ; j<=_Degree1 ; j++ ) for( int k=0 ; k<=_Degree2 ; k++ ) _dot += integrals[j][k] * sums[j][k];
 	}
-
 	_dot /= b1.denominator;
 	_dot /= b2.denominator;
 	return ( !D1 && !D2 ) ? _dot / (1<<depth) : _dot * ( 1<<( depth*(D1+D2-1) ) );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< D1 , D2 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< D1 , D2 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< D1-1 , D2 , _D1 , _D2 , Integrator >::Set2D( integrator , depth );
 	IntegratorSetter< D1   , D2 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< D1 , D2 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< D1 , D2 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< D1 , D2-1 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 	_IntegratorSetter< D1 , D2 , _D1 , _D2 >::Set( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D2 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< 0 , D2 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< 0 , D2 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< 0 , D2 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D2 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< 0 , D2 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< 0 , D2 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< 0 , D2-1 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 	_IntegratorSetter< 0 , D2 , _D1 , _D2 >::Set( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< D1 , 0 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< D1 , 0 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< D1-1 , 0 , _D1 , _D2 , Integrator >::Set2D( integrator , depth );
 	IntegratorSetter< D1   , 0 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< D1 , 0 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< D1 , 0 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
 {
 	_IntegratorSetter< D1 , 0 , _D1 , _D2 >::Set( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< 0 , 0 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< 0 , 0 , _D1 , _D2 , Integrator >::Set2D( Integrator& integrator , int depth )
 {
 	IntegratorSetter< 0 , 0 , _D1 , _D2 , Integrator >::Set1D( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int _D1 , unsigned int _D2 , class Integrator >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::IntegratorSetter< 0 , 0 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::IntegratorSetter< 0 , 0 , _D1 , _D2 , Integrator >::Set1D( Integrator& integrator , int depth )
 {
 	_IntegratorSetter< 0 , 0 , _D1 , _D2 >::Set( integrator , depth );
 }
 
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 , unsigned int _D1 , unsigned int _D2 >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::_IntegratorSetter< D1 , D2 , _D1 , _D2 >::Set( typename FunctionIntegrator::template Integrator< _D1 , _D2 >& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::_IntegratorSetter< D1 , D2 , _D1 , _D2 >::Set( typename FunctionIntegrator::template Integrator< _D1 , _D2 >& integrator , int depth )
 {
 	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart ; j<=BSplineOverlapSizes< Degree1 , Degree2 >::OverlapEnd ; j++ )
 	{
@@ -347,9 +396,9 @@ void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::_IntegratorS
 		integrator._ccIntegrals[D1][D2][i][j-BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart] = Dot< D1 , D2 >( depth , ii , depth , ii+j );
 	}
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 , unsigned int _D1 , unsigned int _D2 >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::_IntegratorSetter< D1 , D2 , _D1 , _D2 >::Set( typename FunctionIntegrator::template ChildIntegrator< _D1 , _D2 >& integrator , int pDepth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::_IntegratorSetter< D1 , D2 , _D1 , _D2 >::Set( typename FunctionIntegrator::template ChildIntegrator< _D1 , _D2 >& integrator , int pDepth )
 {
 	for( int i=0 ; i<IndexSize ; i++ ) for( int j=BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapStart ; j<=BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapEnd ; j++ )
 	{
@@ -358,34 +407,34 @@ void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::_IntegratorS
 	}
 }
 
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::SetIntegrator( typename FunctionIntegrator::template Integrator< D1 , D2 >& integrator , int depth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::SetIntegrator( typename FunctionIntegrator::template Integrator< D1 , D2 >& integrator , int depth )
 {
 	integrator._depth = depth;
 	IntegratorSetter< D1 , D2 , D1 , D2 , typename FunctionIntegrator::template Integrator< D1 , D2 > >::Set2D( integrator , depth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 >
-void BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::SetChildIntegrator( typename FunctionIntegrator::template ChildIntegrator< D1 , D2 >& integrator , int parentDepth )
+void BSplineIntegrationData< FEMSig1 , FEMSig2 >::SetChildIntegrator( typename FunctionIntegrator::template ChildIntegrator< D1 , D2 >& integrator , int parentDepth )
 {
 	integrator._parentDepth = parentDepth;
 	IntegratorSetter< D1 , D2 , D1 , D2 , typename FunctionIntegrator::template ChildIntegrator< D1 , D2 > >::Set2D( integrator , parentDepth );
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 >
-double BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIntegrator::Integrator< D1 , D2 >::dot( int off1 , int off2 , int d1 , int d2 ) const
+double BSplineIntegrationData< FEMSig1 , FEMSig2 >::FunctionIntegrator::Integrator< D1 , D2 >::dot( int off1 , int off2 , int d1 , int d2 ) const
 {
 	int d = off2-off1;
-	if( BSplineEvaluationData< Degree1 , BType1 >::OutOfBounds( _depth , off1 ) || BSplineEvaluationData< Degree2 , BType2 >::OutOfBounds( _depth , off2 ) || d<BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart || d>BSplineOverlapSizes< Degree1 , Degree2 >::OverlapEnd ) return 0;
+	if( BSplineEvaluationData< FEMSig1 >::OutOfBounds( _depth , off1 ) || BSplineEvaluationData< FEMSig2 >::OutOfBounds( _depth , off2 ) || d<BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart || d>BSplineOverlapSizes< Degree1 , Degree2 >::OverlapEnd ) return 0;
 	return _ccIntegrals[d1][d2][ OffsetToIndex( _depth , off1 ) ][d-BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart];
 }
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
+template< unsigned int FEMSig1 , unsigned int FEMSig2 >
 template< unsigned int D1 , unsigned int D2 >
-double BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIntegrator::ChildIntegrator< D1 , D2 >::dot( int off1 , int off2 , int d1 , int d2 ) const
+double BSplineIntegrationData< FEMSig1 , FEMSig2 >::FunctionIntegrator::ChildIntegrator< D1 , D2 >::dot( int off1 , int off2 , int d1 , int d2 ) const
 {
 	int d = off2-2*off1;
-	if( BSplineEvaluationData< Degree1 , BType1 >::OutOfBounds( _parentDepth , off1 ) || BSplineEvaluationData< Degree2 , BType2 >::OutOfBounds( _parentDepth+1 , off2 ) || d<BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapStart || d>BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapEnd ) return 0;
+	if( BSplineEvaluationData< FEMSig1 >::OutOfBounds( _parentDepth , off1 ) || BSplineEvaluationData< FEMSig2 >::OutOfBounds( _parentDepth+1 , off2 ) || d<BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapStart || d>BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapEnd ) return 0;
 	return _pcIntegrals[d1][d2][ OffsetToIndex( _parentDepth , off1 ) ][d-BSplineOverlapSizes< Degree1 , Degree2 >::ChildOverlapStart];
 }
 
@@ -393,12 +442,40 @@ double BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIn
 // BSplineData //
 /////////////////
 #define MODULO( A , B ) ( (A)<0 ? ( (B)-((-(A))%(B)) ) % (B) : (A) % (B) )
-template< int Degree , BoundaryType BType >
-int BSplineData< Degree , BType >::RemapOffset( int depth , int offset , bool& reflect )
+
+template< unsigned int FEMSig , unsigned int D >
+BSplineData< FEMSig , D >::BSplineComponents::BSplineComponents( int depth , int offset )
+{
+	static const int _Degree = Degree;
+	int res = 1<<depth;
+	BSplineElements< Degree > elements( res , offset , FEMSignature< FEMSig >::BType );
+
+	// The first index is the position, the second is the element type
+	Polynomial< Degree > components[Degree+1][Degree+1];
+	// Generate the elements that can appear in the base function corresponding to the base function at (depth,offset) = (0,0)
+	for( int d=0 ; d<=Degree ; d++ ) for( int dd=0 ; dd<=Degree ; dd++ ) components[d][dd] = Polynomial< Degree >::BSplineComponent( _Degree-dd ).shift( -( (_Degree+1)/2 ) + d );
+
+	// Now adjust to the desired depth and offset
+	double width = 1. / res;
+	for( int d=0 ; d<=Degree ; d++ ) for( int dd=0 ; dd<=Degree ; dd++ ) components[d][dd] = components[d][dd].scale( width ).shift( width*offset );
+
+	// Now write in the polynomials
+	for( int d=0 ; d<=Degree ; d++ )
+	{
+		int idx = offset + BSplineSupportSizes< Degree >::SupportStart + d;
+		_polys[d][0] = Polynomial< Degree >();
+
+		if( idx>=0 && idx<res ) for( int dd=0 ; dd<=Degree ; dd++ ) _polys[d][0] += components[d][dd] * ( ( double )( elements[idx][dd] ) ) / elements.denominator;
+	}
+	for( int d=1 ; d<=D ; d++ ) for( int dd=0 ; dd<=Degree ; dd++ ) _polys[dd][d] = _polys[dd][d-1].derivative();
+}
+
+template< unsigned int FEMSig , unsigned int D >
+int BSplineData< FEMSig , D >::RemapOffset( int depth , int offset , bool& reflect )
 {
 	const int I = ( Degree&1 ) ? 0 : 1;
-	if( BType==BOUNDARY_FREE ){ reflect = false ; return offset; }
-	int dim = BSplineEvaluationData< Degree , BOUNDARY_NEUMANN >::End( depth ) - BSplineEvaluationData< Degree , BOUNDARY_NEUMANN >::Begin( depth );
+	if( FEMSignature< FEMSig >::BType==BOUNDARY_FREE ){ reflect = false ; return offset; }
+	int dim = BSplineEvaluationData< FEMDegreeAndBType< Degree , BOUNDARY_NEUMANN >::Signature >::End( depth ) - BSplineEvaluationData< FEMDegreeAndBType< Degree , BOUNDARY_NEUMANN >::Signature >::Begin( depth );
 	offset = MODULO( offset , 2*(dim-1+I) );
 	reflect = offset>=dim;
 	if( reflect ) return 2*(dim-1+I) - (offset+I);
@@ -406,32 +483,37 @@ int BSplineData< Degree , BType >::RemapOffset( int depth , int offset , bool& r
 }
 #undef MODULO
 
-template< int Degree , BoundaryType BType >
-BSplineData< Degree , BType >::BSplineData( int maxDepth )
+template< unsigned int FEMSig , unsigned int D >
+BSplineData< FEMSig , D >::BSplineData( void )
 {
-	functionCount = TotalFunctionCount( maxDepth );
-	baseBSplines = NewPointer< typename BSplineEvaluationData< Degree , BType >::BSplineComponents >( functionCount );
-	dBaseBSplines = NewPointer< typename BSplineEvaluationData< Degree , BType >::BSplineComponents >( functionCount );
+	_maxDepth = 0;
+	_evaluators = NullPointer( SparseBSplineEvaluator );
+}
+template< unsigned int FEMSig , unsigned int D >
+void BSplineData< FEMSig , D >::reset( int maxDepth )
+{
+	if( _evaluators ) DeletePointer( _evaluators );
 
-	for( size_t i=0 ; i<functionCount ; i++ )
-	{
-		int d , off;
-		FactorFunctionIndex( (int)i , d , off );
-		baseBSplines[i] = typename BSplineEvaluationData< Degree , BType >::BSplineComponents( d , off );
-		dBaseBSplines[i] = baseBSplines[i].derivative();
-	}
+	_maxDepth = maxDepth;
+	_evaluators = NewPointer< SparseBSplineEvaluator >( _maxDepth+1 );
+	for( unsigned int d=0 ; d<=_maxDepth ; d++ ) _evaluators[d].init( d );
+}
+template< unsigned int FEMSig , unsigned int D >
+BSplineData< FEMSig , D >::BSplineData( int maxDepth )
+{
+	_evaluators = NullPointer( SparseBSplineEvaluator );
+	reset( maxDepth );
 }
-template< int Degree , BoundaryType BType >
-BSplineData< Degree , BType >::~BSplineData( void )
+template< unsigned int FEMSig , unsigned int D >
+BSplineData< FEMSig , D >::~BSplineData( void )
 {
-	FreePointer(  baseBSplines );
-	FreePointer( dBaseBSplines );
+	DeletePointer( _evaluators );
 }
 
 /////////////////////
 // BSplineElements //
 /////////////////////
-template< int Degree >
+template< unsigned int Degree >
 BSplineElements< Degree >::BSplineElements( int res , int offset , BoundaryType bType )
 {
 	denominator = 1;
@@ -458,12 +540,12 @@ BSplineElements< Degree >::BSplineElements( int res , int offset , BoundaryType
 		_addPeriodic< true >( _ReflectLeft( offset , res ) , bType==BOUNDARY_DIRICHLET ) , _addPeriodic< false >( _ReflectRight( offset , res ) , bType==BOUNDARY_DIRICHLET );
 	}
 }
-template< int Degree > int BSplineElements< Degree >::_ReflectLeft ( int offset , int res ){ return (Degree&1) ?      -offset :      -1-offset; }
-template< int Degree > int BSplineElements< Degree >::_ReflectRight( int offset , int res ){ return (Degree&1) ? 2*res-offset : 2*res-1-offset; }
-template< int Degree > int BSplineElements< Degree >::_RotateLeft  ( int offset , int res ){ return offset-2*res; }
-template< int Degree > int BSplineElements< Degree >::_RotateRight ( int offset , int res ){ return offset+2*res; }
+template< unsigned int Degree > int BSplineElements< Degree >::_ReflectLeft ( int offset , int res ){ return (Degree&1) ?      -offset :      -1-offset; }
+template< unsigned int Degree > int BSplineElements< Degree >::_ReflectRight( int offset , int res ){ return (Degree&1) ? 2*res-offset : 2*res-1-offset; }
+template< unsigned int Degree > int BSplineElements< Degree >::_RotateLeft  ( int offset , int res ){ return offset-2*res; }
+template< unsigned int Degree > int BSplineElements< Degree >::_RotateRight ( int offset , int res ){ return offset+2*res; }
 
-template< int Degree >
+template< unsigned int Degree >
 template< bool Left >
 void BSplineElements< Degree >::_addPeriodic( int offset , bool negate )
 {
@@ -478,7 +560,7 @@ void BSplineElements< Degree >::_addPeriodic( int offset , bool negate )
 	// If there is a change for additional overlap, give it a go
 	if( set ) _addPeriodic< Left >( Left ? _RotateLeft( offset , res ) : _RotateRight( offset , res ) , negate );
 }
-template< int Degree >
+template< unsigned int Degree >
 void BSplineElements< Degree >::upSample( BSplineElements< Degree >& high ) const
 {
 	int bCoefficients[ BSplineSupportSizes< Degree >::UpSampleSize ];
@@ -507,11 +589,10 @@ void BSplineElements< Degree >::upSample( BSplineElements< Degree >& high ) cons
 	high.denominator = denominator<<Degree;
 }
 
-template< int Degree >
+template< unsigned int Degree >
 template< unsigned int D >
 void BSplineElements< Degree >::differentiate( BSplineElements< Degree-D >& d ) const{ Differentiator< Degree , Degree-D >::Differentiate( *this , d ); }
-
-template< int Degree , int DDegree >
+template< unsigned int Degree , unsigned int DDegree >
 void Differentiator< Degree , DDegree >::Differentiate( const BSplineElements< Degree >& bse , BSplineElements< DDegree >& dbse )
 {
 	BSplineElements< Degree-1 > _dbse;
@@ -525,13 +606,12 @@ void Differentiator< Degree , DDegree >::Differentiate( const BSplineElements< D
 	_dbse.denominator = bse.denominator;
 	return Differentiator< Degree-1 , DDegree >::Differentiate( _dbse , dbse );
 }
-
-template< int Degree >
+template< unsigned int Degree >
 void Differentiator< Degree , Degree >::Differentiate( const BSplineElements< Degree >& bse , BSplineElements< Degree >& dbse ){ dbse = bse; }
 
 // If we were really good, we would implement this integral table to store
 // rational values to improve precision...
-template< int Degree1 , int Degree2 >
+template< unsigned int Degree1 , unsigned int Degree2 >
 void SetBSplineElementIntegrals( double integrals[Degree1+1][Degree2+1] )
 {
 	for( int i=0 ; i<=Degree1 ; i++ )
diff --git a/Src/CmdLineParser.cpp b/Src/CmdLineParser.cpp
deleted file mode 100644
index 9061b4a..0000000
--- a/Src/CmdLineParser.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include "CmdLineParser.h"
-
-
-#ifdef WIN32
-int strcasecmp(char* c1,char* c2){return _stricmp(c1,c2);}
-#endif
-
-cmdLineReadable::cmdLineReadable(const char* name)
-{
-	set=false;
-	this->name=new char[strlen(name)+1];
-	strcpy(this->name,name);
-}
-cmdLineReadable::~cmdLineReadable(void)
-{
-	if(name) delete[] name;
-	name=NULL;
-}
-int cmdLineReadable::read(char**,int){
-	set=true;
-	return 0;
-}
-void cmdLineReadable::writeValue(char* str)
-{
-	str[0] = 0;
-}
-
-////////////////
-// cmdLineInt //
-////////////////
-cmdLineInt::cmdLineInt(const char* name) : cmdLineReadable(name) {value=0;}
-cmdLineInt::cmdLineInt(const char* name,const int& v) : cmdLineReadable(name) {value=v;}
-int cmdLineInt::read(char** argv,int argc){
-	if(argc>0){
-		value=atoi(argv[0]);
-		set=true;
-		return 1;
-	}
-	else{return 0;}
-}
-void cmdLineInt::writeValue(char* str)
-{
-	sprintf(str,"%d",value);
-}
-
-//////////////////
-// cmdLineFloat //
-//////////////////
-cmdLineFloat::cmdLineFloat(const char* name) : cmdLineReadable(name) {value=0;}
-cmdLineFloat::cmdLineFloat(const char* name, const float& v) : cmdLineReadable(name) {value=v;}
-int cmdLineFloat::read(char** argv,int argc){
-	if(argc>0){
-		value=(float)atof(argv[0]);
-		set=true;
-		return 1;
-	}
-	else{return 0;}
-}
-void cmdLineFloat::writeValue(char* str)
-{
-	sprintf(str,"%f",value);
-}
-
-///////////////////
-// cmdLineString //
-///////////////////
-cmdLineString::cmdLineString(const char* name) : cmdLineReadable(name) {value=NULL;}
-cmdLineString::~cmdLineString(void)
-{
-	if(value)	delete[] value;
-	value=NULL;
-}
-int cmdLineString::read(char** argv,int argc){
-	if(argc>0)
-	{
-		value=new char[strlen(argv[0])+1];
-		strcpy(value,argv[0]);
-		set=true;
-		return 1;
-	}
-	else{return 0;}
-}
-void cmdLineString::writeValue(char* str)
-{
-	sprintf(str,"%s",value);
-}
-
-////////////////////
-// cmdLineStrings //
-////////////////////
-cmdLineStrings::cmdLineStrings(const char* name,int Dim) : cmdLineReadable(name)
-{
-	this->Dim=Dim;
-	values=new char*[Dim];
-	for(int i=0;i<Dim;i++)	values[i]=NULL;
-}
-cmdLineStrings::~cmdLineStrings(void)
-{
-	for(int i=0;i<Dim;i++)
-	{
-		if(values[i])	delete[] values[i];
-		values[i]=NULL;
-	}
-	delete[] values;
-	values=NULL;
-}
-int cmdLineStrings::read(char** argv,int argc)
-{
-	if(argc>=Dim)
-	{
-		for(int i=0;i<Dim;i++)
-		{
-			values[i]=new char[strlen(argv[i])+1];
-			strcpy(values[i],argv[i]);
-		}
-		set=true;
-		return Dim;
-	}
-	else	return 0;
-}
-void cmdLineStrings::writeValue(char* str)
-{
-	char* temp=str;
-	for(int i=0;i<Dim;i++)
-	{
-		sprintf(temp,"%s ",values[i]);
-		temp=str+strlen(str);
-	}
-}
-
-
-char* GetFileExtension(char* fileName){
-	char* fileNameCopy;
-	char* ext=NULL;
-	char* temp;
-
-	fileNameCopy=new char[strlen(fileName)+1];
-	assert(fileNameCopy);
-	strcpy(fileNameCopy,fileName);
-	temp=strtok(fileNameCopy,".");
-	while(temp!=NULL)
-	{
-		if(ext!=NULL){delete[] ext;}
-		ext=new char[strlen(temp)+1];
-		assert(ext);
-		strcpy(ext,temp);
-		temp=strtok(NULL,".");
-	}
-	delete[] fileNameCopy;
-	return ext;
-}
-char* GetLocalFileName(char* fileName){
-	char* fileNameCopy;
-	char* name=NULL;
-	char* temp;
-
-	fileNameCopy=new char[strlen(fileName)+1];
-	assert(fileNameCopy);
-	strcpy(fileNameCopy,fileName);
-	temp=strtok(fileNameCopy,"\\");
-	while(temp!=NULL){
-		if(name!=NULL){delete[] name;}
-		name=new char[strlen(temp)+1];
-		assert(name);
-		strcpy(name,temp);
-		temp=strtok(NULL,"\\");
-	}
-	delete[] fileNameCopy;
-	return name;
-}
-
-void cmdLineParse(int argc, char **argv,int num,cmdLineReadable** readable,int dumpError)
-{
-	int i,j;
-	while (argc > 0)
-	{
-		if (argv[0][0] == '-' && argv[0][1]=='-')
-		{
-			for(i=0;i<num;i++)
-			{
-				if (!strcmp(&argv[0][2],readable[i]->name))
-				{
-					argv++, argc--;
-					j=readable[i]->read(argv,argc);
-					argv+=j,argc-=j;
-					break;
-				}
-			}
-			if(i==num){
-				if(dumpError)
-				{
-					fprintf(stderr, "invalid option: %s\n",*argv);
-					fprintf(stderr, "possible options are:\n");
-					for(i=0;i<num;i++)	fprintf(stderr, "  %s\n",readable[i]->name);
-				}
-				argv++, argc--;
-			}
-		}
-		else
-		{
-			if(dumpError)
-			{
-				fprintf(stderr, "invalid option: %s\n", *argv);
-				fprintf(stderr, "  options must start with a \'--\'\n");
-			}
-			argv++, argc--;
-		}
-	}
-}
-char** ReadWords(const char* fileName,int& cnt)
-{
-	char** names;
-	char temp[500];
-	FILE* fp;
-
-	fp=fopen(fileName,"r");
-	if(!fp){return NULL;}
-	cnt=0;
-	while(fscanf(fp," %s ",temp)==1){cnt++;}
-	fclose(fp);
-
-	names=new char*[cnt];
-	if(!names){return NULL;}
-
-	fp=fopen(fileName,"r");
-	if(!fp){
-		delete[] names;
-		cnt=0;
-		return NULL;
-	}
-	cnt=0;
-	while(fscanf(fp," %s ",temp)==1){
-		names[cnt]=new char[strlen(temp)+1];
-		if(!names){
-			for(int j=0;j<cnt;j++){delete[] names[j];}
-			delete[] names;
-			cnt=0;
-			fclose(fp);
-			return NULL;
-		}
-		strcpy(names[cnt],temp);
-		cnt++;
-	}
-	fclose(fp);
-	return names;
-}
diff --git a/Src/CmdLineParser.h b/Src/CmdLineParser.h
index 6d8b175..a1fbd0e 100644
--- a/Src/CmdLineParser.h
+++ b/Src/CmdLineParser.h
@@ -28,93 +28,79 @@ DAMAGE.
 
 #ifndef CMD_LINE_PARSER_INCLUDED
 #define CMD_LINE_PARSER_INCLUDED
-#include <stdarg.h>
-#include <string.h>
 
+#include <stdarg.h>
+#include <cstring>
+#include <cstdlib>
+#include <string>
+#include <vector>
 
 #ifdef WIN32
-int strcasecmp(char* c1,char* c2);
-#endif
+int strcasecmp( const char* c1 , const char* c2 );
+#endif // WIN32
 
-class cmdLineReadable{
+class cmdLineReadable
+{
 public:
 	bool set;
-	char* name;
-	cmdLineReadable(const char* name);
-	virtual ~cmdLineReadable(void);
-	virtual int read(char** argv,int argc);
-	virtual void writeValue(char* str);
+	char *name;
+	cmdLineReadable( const char *name );
+	virtual ~cmdLineReadable( void );
+	virtual int read( char** argv , int argc );
+	virtual void writeValue( char* str ) const;
 };
 
-class cmdLineInt : public cmdLineReadable {
-public:
-	int value;
-	cmdLineInt(const char* name);
-	cmdLineInt(const char* name,const int& v);
-	int read(char** argv,int argc);
-	void writeValue(char* str);
-};
-template<int Dim>
-class cmdLineIntArray : public cmdLineReadable {
-public:
-	int values[Dim];
-	cmdLineIntArray(const char* name);
-	cmdLineIntArray(const char* name,const int v[Dim]);
-	int read(char** argv,int argc);
-	void writeValue(char* str);
-};
+template< class Type > void cmdLineWriteValue( Type t , char* str );
+template< class Type > void cmdLineCleanUp( Type* t );
+template< class Type > Type cmdLineInitialize( void );
+template< class Type > Type cmdLineCopy( Type t );
+template< class Type > Type cmdLineStringToType( const char* str );
 
-class cmdLineFloat : public cmdLineReadable {
+template< class Type >
+class cmdLineParameter : public cmdLineReadable
+{
 public:
-	float value;
-	cmdLineFloat(const char* name);
-	cmdLineFloat(const char* name,const float& f);
-	int read(char** argv,int argc);
-	void writeValue(char* str);
+	Type value;
+	cmdLineParameter( const char *name );
+	cmdLineParameter( const char *name , Type v );
+	~cmdLineParameter( void );
+	int read( char** argv , int argc );
+	void writeValue( char* str ) const;
+	bool expectsArg( void ) const { return true; }
 };
-template<int Dim>
-class cmdLineFloatArray : public cmdLineReadable {
-public:
-	float values[Dim];
-	cmdLineFloatArray(const char* name);
-	cmdLineFloatArray(const char* name,const float f[Dim]);
-	int read(char** argv,int argc);
-	void writeValue(char* str);
-};
-class cmdLineString : public cmdLineReadable {
-public:
-	char* value;
-	cmdLineString(const char* name);
-	~cmdLineString();
-	int read(char** argv,int argc);
-	void writeValue(char* str);
-};
-class cmdLineStrings : public cmdLineReadable {
-	int Dim;
+
+template< class Type , int Dim >
+class cmdLineParameterArray : public cmdLineReadable
+{
 public:
-	char** values;
-	cmdLineStrings(const char* name,int Dim);
-	~cmdLineStrings(void);
-	int read(char** argv,int argc);
-	void writeValue(char* str);
+	Type values[Dim];
+	cmdLineParameterArray( const char *name, const Type* v=NULL );
+	~cmdLineParameterArray( void );
+	int read( char** argv , int argc );
+	void writeValue( char* str ) const;
+	bool expectsArg( void ) const { return true; }
 };
-template<int Dim>
-class cmdLineStringArray : public cmdLineReadable {
+
+template< class Type >
+class cmdLineParameters : public cmdLineReadable
+{
 public:
-	char* values[Dim];
-	cmdLineStringArray(const char* name);
-	~cmdLineStringArray();
-	int read(char** argv,int argc);
-	void writeValue(char* str);
+	int count;
+	Type *values;
+	cmdLineParameters( const char* name );
+	~cmdLineParameters( void );
+	int read( char** argv , int argc );
+	void writeValue( char* str ) const;
+	bool expectsArg( void ) const { return true; }
 };
 
-// This reads the arguments in argc, matches them against "names" and sets
-// the values of "r" appropriately. Parameters start with "--"
-void cmdLineParse(int argc, char **argv,int num,cmdLineReadable** r,int dumpError=1);
-
-char* GetFileExtension(char* fileName);
-char* GetLocalFileName(char* fileName);
-char** ReadWords(const char* fileName,int& cnt);
+void cmdLineParse( int argc , char **argv, cmdLineReadable** params );
+char* FileExtension( char* fileName );
+char* LocalFileName( char* fileName );
+char* DirectoryName( char* fileName );
+char* GetFileExtension( const char* fileName );
+char* GetLocalFileName( const char* fileName );
+char** ReadWords( const char* fileName , int& cnt );
 
 #include "CmdLineParser.inl"
 #endif // CMD_LINE_PARSER_INCLUDED
diff --git a/Src/CmdLineParser.inl b/Src/CmdLineParser.inl
index eeded68..135668c 100644
--- a/Src/CmdLineParser.inl
+++ b/Src/CmdLineParser.inl
@@ -26,116 +26,275 @@ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF S
 DAMAGE.
 */
 
+#include <cassert>
+#include <string.h>
+
+#if defined( WIN32 ) || defined( _WIN64 )
+inline int strcasecmp( const char* c1 , const char* c2 ){ return _stricmp( c1 , c2 ); }
+#endif // WIN32 || _WIN64
+
+template< > void cmdLineCleanUp< int    >( int*    t ){ }
+template< > void cmdLineCleanUp< float  >( float*  t ){ }
+template< > void cmdLineCleanUp< double >( double* t ){ }
+template< > void cmdLineCleanUp< char*  >( char** t ){ if( *t ) free( *t ) ; *t = NULL; }
+template< > int    cmdLineInitialize< int    >( void ){ return 0; }
+template< > float  cmdLineInitialize< float  >( void ){ return 0.f; }
+template< > double cmdLineInitialize< double >( void ){ return 0.; }
+template< > char*  cmdLineInitialize< char*  >( void ){ return NULL; }
+template< > void cmdLineWriteValue< int    >( int    t , char* str ){ sprintf( str , "%d" , t ); }
+template< > void cmdLineWriteValue< float  >( float  t , char* str ){ sprintf( str , "%f" , t ); }
+template< > void cmdLineWriteValue< double >( double t , char* str ){ sprintf( str , "%f" , t ); }
+template< > void cmdLineWriteValue< char*  >( char*  t , char* str ){ if( t ) sprintf( str , "%s" , t ) ; else str[0]=0; }
+template< > int    cmdLineCopy( int    t ){ return t;  }
+template< > float  cmdLineCopy( float  t ){ return t;  }
+template< > double cmdLineCopy( double t ){ return t;  }
+#if defined( WIN32 ) || defined( _WIN64 )
+template< > char*  cmdLineCopy( char* t ){ return _strdup( t ); }
+#else // !WIN32 && !_WIN64
+template< > char*  cmdLineCopy( char* t ){ return strdup( t ); }
+#endif // WIN32 || _WIN64
+template< > int    cmdLineStringToType( const char* str ){ return atoi( str ); }
+template< > float  cmdLineStringToType( const char* str ){ return float( atof( str ) ); }
+template< > double cmdLineStringToType( const char* str ){ return double( atof( str ) ); }
+#if defined( WIN32 ) || defined( _WIN64 )
+template< > char*  cmdLineStringToType( const char* str ){ return _strdup( str ); }
+#else // !WIN32 && !_WIN64
+template< > char*  cmdLineStringToType( const char* str ){ return  strdup( str ); }
+#endif // WIN32 || _WIN64
+
+
 /////////////////////
-// cmdLineIntArray //
+// cmdLineReadable //
 /////////////////////
-template<int Dim>
-cmdLineIntArray<Dim>::cmdLineIntArray(const char* name) : cmdLineReadable(name)
+#if defined( WIN32 ) || defined( _WIN64 )
+inline cmdLineReadable::cmdLineReadable( const char *name ) : set(false) { this->name = _strdup( name ); }
+#else // !WIN32 && !_WIN64
+inline cmdLineReadable::cmdLineReadable( const char *name ) : set(false) { this->name =  strdup( name ); }
+#endif // WIN32 || _WIN64
+
+inline cmdLineReadable::~cmdLineReadable( void ){ if( name ) free( name ) ; name = NULL; }
+inline int cmdLineReadable::read( char** , int ){ set = true ; return 0; }
+inline void cmdLineReadable::writeValue( char* str ) const { str[0] = 0; }
+
+//////////////////////
+// cmdLineParameter //
+//////////////////////
+template< class Type > cmdLineParameter< Type >::~cmdLineParameter( void ) { cmdLineCleanUp( &value ); }
+template< class Type > cmdLineParameter< Type >::cmdLineParameter( const char *name ) : cmdLineReadable( name ){ value = cmdLineInitialize< Type >(); }
+template< class Type > cmdLineParameter< Type >::cmdLineParameter( const char *name , Type v ) : cmdLineReadable( name ){ value = cmdLineCopy< Type >( v ); }
+template< class Type >
+int cmdLineParameter< Type >::read( char** argv , int argc )
 {
-	for(int i=0;i<Dim;i++)	values[i]=0;
+	if( argc>0 )
+	{
+		cmdLineCleanUp< Type >( &value ) , value = cmdLineStringToType< Type >( argv[0] );
+		set = true;
+		return 1;
+	}
+	else return 0;
 }
-template<int Dim>
-cmdLineIntArray<Dim>::cmdLineIntArray(const char* name,const int v[Dim]) : cmdLineReadable(name)
+template< class Type >
+void cmdLineParameter< Type >::writeValue( char* str ) const { cmdLineWriteValue< Type >( value , str ); }
+
+
+///////////////////////////
+// cmdLineParameterArray //
+///////////////////////////
+template< class Type , int Dim >
+cmdLineParameterArray< Type , Dim >::cmdLineParameterArray( const char *name , const Type* v ) : cmdLineReadable( name )
 {
-	for(int i=0;i<Dim;i++)	values[i]=v[i];
+	if( v ) for( int i=0 ; i<Dim ; i++ ) values[i] = cmdLineCopy< Type >( v[i] );
+	else    for( int i=0 ; i<Dim ; i++ ) values[i] = cmdLineInitialize< Type >();
 }
-template<int Dim>
-int cmdLineIntArray<Dim>::read(char** argv,int argc)
+template< class Type , int Dim >
+cmdLineParameterArray< Type , Dim >::~cmdLineParameterArray( void ){ for( int i=0 ; i<Dim ; i++ ) cmdLineCleanUp< Type >( values+i ); }
+template< class Type , int Dim >
+int cmdLineParameterArray< Type , Dim >::read( char** argv , int argc )
 {
-	if(argc>=Dim)
+	if( argc>=Dim )
 	{
-		for(int i=0;i<Dim;i++)	values[i]=atoi(argv[i]);
-		set=true;
+		for( int i=0 ; i<Dim ; i++ ) cmdLineCleanUp< Type >( values+i ) , values[i] = cmdLineStringToType< Type >( argv[i] );
+		set = true;
 		return Dim;
 	}
-	else{return 0;}
+	else return 0;
 }
-template<int Dim>
-void cmdLineIntArray<Dim>::writeValue(char* str)
+template< class Type , int Dim >
+void cmdLineParameterArray< Type , Dim >::writeValue( char* str ) const
 {
 	char* temp=str;
-	for(int i=0;i<Dim;i++)
+	for( int i=0 ; i<Dim ; i++ )
 	{
-		sprintf(temp,"%d ",values[i]);
-		temp=str+strlen(str);
+		cmdLineWriteValue< Type >( values[i] , temp );
+		temp = str+strlen( str );
 	}
 }
-
 ///////////////////////
-// cmdLineFloatArray //
+// cmdLineParameters //
 ///////////////////////
-template<int Dim>
-cmdLineFloatArray<Dim>::cmdLineFloatArray(const char* name) : cmdLineReadable(name)
-{
-	for(int i=0;i<Dim;i++)	values[i]=0;
-}
-template<int Dim>
-cmdLineFloatArray<Dim>::cmdLineFloatArray(const char* name,const float f[Dim]) : cmdLineReadable(name)
+template< class Type >
+cmdLineParameters< Type >::cmdLineParameters( const char* name ) : cmdLineReadable( name ) , values(NULL) , count(0) { }
+template< class Type >
+cmdLineParameters< Type >::~cmdLineParameters( void )
 {
-	for(int i=0;i<Dim;i++)	values[i]=f[i];
+	if( values ) delete[] values;
+	values = NULL;
+	count = 0;
 }
-template<int Dim>
-int cmdLineFloatArray<Dim>::read(char** argv,int argc)
+template< class Type >
+int cmdLineParameters< Type >::read( char** argv , int argc )
 {
-	if(argc>=Dim)
+	if( values ) delete[] values;
+	values = NULL;
+
+	if( argc>0 )
 	{
-		for(int i=0;i<Dim;i++)	values[i]=(float)atof(argv[i]);
-		set=true;
-		return Dim;
+		count = atoi(argv[0]);
+		if( count <= 0 || argc <= count ) return 1;
+		values = new Type[count];
+		if( !values ) return 0;
+		for( int i=0 ; i<count ; i++ ) values[i] = cmdLineStringToType< Type >( argv[i+1] );
+		set = true;
+		return count+1;
 	}
-	else{return 0;}
+	else return 0;
 }
-template<int Dim>
-void cmdLineFloatArray<Dim>::writeValue(char* str)
+template< class Type >
+void cmdLineParameters< Type >::writeValue( char* str ) const
 {
 	char* temp=str;
-	for(int i=0;i<Dim;i++)
+	for( int i=0 ; i<count ; i++ )
 	{
-		sprintf(temp,"%f ",values[i]);
-		temp=str+strlen(str);
+		cmdLineWriteValue< Type >( values[i] , temp );
+		temp = str+strlen( str );
 	}
 }
 
 
-////////////////////////
-// cmdLineStringArray //
-////////////////////////
-template<int Dim>
-cmdLineStringArray<Dim>::cmdLineStringArray(const char* name) : cmdLineReadable(name)
+inline char* FileExtension( char* fileName )
 {
-	for(int i=0;i<Dim;i++)	values[i]=NULL;
+	char* temp = fileName;
+	for( int i=0 ; i<strlen(fileName) ; i++ ) if( fileName[i]=='.' ) temp = &fileName[i+1];
+	return temp;
 }
-template<int Dim>
-cmdLineStringArray<Dim>::~cmdLineStringArray(void)
+
+inline char* GetFileExtension( const char* fileName )
 {
-	for(int i=0;i<Dim;i++)
+	char* fileNameCopy;
+	char* ext=NULL;
+	char* temp;
+
+	fileNameCopy=new char[strlen(fileName)+1];
+	assert(fileNameCopy);
+	strcpy(fileNameCopy,fileName);
+	temp=strtok(fileNameCopy,".");
+	while(temp!=NULL)
 	{
-		if(values[i])	delete[] values[i];
-		values[i]=NULL;
+		if(ext!=NULL){delete[] ext;}
+		ext=new char[strlen(temp)+1];
+		assert(ext);
+		strcpy(ext,temp);
+		temp=strtok(NULL,".");
 	}
+	delete[] fileNameCopy;
+	return ext;
 }
-template<int Dim>
-int cmdLineStringArray<Dim>::read(char** argv,int argc)
+inline char* GetLocalFileName( const char* fileName )
 {
-	if(argc>=Dim)
-	{
-		for(int i=0;i<Dim;i++)
+	char* fileNameCopy;
+	char* name=NULL;
+	char* temp;
+
+	fileNameCopy=new char[strlen(fileName)+1];
+	assert(fileNameCopy);
+	strcpy(fileNameCopy,fileName);
+	temp=strtok(fileNameCopy,"\\");
+	while(temp!=NULL){
+		if(name!=NULL){delete[] name;}
+		name=new char[strlen(temp)+1];
+		assert(name);
+		strcpy(name,temp);
+		temp=strtok(NULL,"\\");
+	}
+	delete[] fileNameCopy;
+	return name;
+}
+inline char* LocalFileName( char* fileName )
+{
+	char* temp = fileName;
+	for( int i=0 ; i<(int)strlen(fileName) ; i++ ) if( fileName[i] =='\\' ) temp = &fileName[i+1];
+	return temp;
+}
+inline char* DirectoryName( char* fileName )
+{
+	for( int i=int( strlen(fileName) )-1 ; i>=0 ; i-- )
+		if( fileName[i] =='\\' )
 		{
-			values[i]=new char[strlen(argv[i])+1];
-			strcpy(values[i],argv[i]);
+			fileName[i] = 0;
+			return fileName;
 		}
-		set=true;
-		return Dim;
-	}
-	else{return 0;}
+	fileName[0] = 0;
+	return fileName;
 }
-template<int Dim>
-void cmdLineStringArray<Dim>::writeValue(char* str)
+
+inline void cmdLineParse( int argc , char **argv , cmdLineReadable** params )
 {
-	char* temp=str;
-	for(int i=0;i<Dim;i++)
+	while( argc>0 )
 	{
-		sprintf(temp,"%s ",values[i]);
-		temp=str+strlen(str);
+		if( argv[0][0]=='-' && argv[0][1]=='-' )
+		{
+			cmdLineReadable* readable=NULL;
+			for( int i=0 ; params[i]!=NULL && readable==NULL ; i++ ) if( !strcasecmp( params[i]->name , argv[0]+2 ) ) readable = params[i];
+			if( readable )
+			{
+				int j = readable->read( argv+1 , argc-1 );
+				argv += j , argc -= j;
+			}
+			else
+			{
+				WARN( "Invalid option: %s" , argv[0] );
+				for( int i=0 ; params[i]!=NULL ; i++ ) fprintf( stderr , "\t--%s\n" , params[i]->name );
+			}
+		}
+		else WARN( "Parameter name should be of the form --<name>: %s" , argv[0] );
+		++argv , --argc;
 	}
 }
+
+inline char** ReadWords(const char* fileName,int& cnt)
+{
+	char** names;
+	char temp[500];
+	FILE* fp;
+
+	fp=fopen(fileName,"r");
+	if(!fp){return NULL;}
+	cnt=0;
+	while(fscanf(fp," %s ",temp)==1){cnt++;}
+	fclose(fp);
+
+	names=new char*[cnt];
+	if(!names){return NULL;}
+
+	fp=fopen(fileName,"r");
+	if(!fp){
+		delete[] names;
+		cnt=0;
+		return NULL;
+	}
+	cnt=0;
+	while(fscanf(fp," %s ",temp)==1){
+		names[cnt]=new char[strlen(temp)+1];
+		if(!names){
+			for(int j=0;j<cnt;j++){delete[] names[j];}
+			delete[] names;
+			cnt=0;
+			fclose(fp);
+			return NULL;
+		}
+		strcpy(names[cnt],temp);
+		cnt++;
+	}
+	fclose(fp);
+	return names;
+}
\ No newline at end of file
diff --git a/Src/EDTInHeat.cpp b/Src/EDTInHeat.cpp
new file mode 100644
index 0000000..40a966a
--- /dev/null
+++ b/Src/EDTInHeat.cpp
@@ -0,0 +1,586 @@
+/*
+Copyright (c) 2016, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#undef FAST_COMPILE				// If enabled, only a single version of the reconstruction code is compiled
+#undef USE_DOUBLE				// If enabled, double-precesion is used
+#undef ARRAY_DEBUG				// If enabled, array access is tested for validity
+#define DIMENSION 3				// The dimension of the system
+#define DEFAULT_FEM_DEGREE 1	// The default finite-element degree
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <float.h>
+#include <functional>
+#include "MyMiscellany.h"
+#include "CmdLineParser.h"
+#include "PPolynomial.h"
+#include "FEMTree.h"
+
+MessageWriter messageWriter;
+
+cmdLineParameter< char* >
+	In( "in" ) ,
+	Out( "out" ) ,
+	InXForm( "inXForm" ) ,
+	OutXForm( "outXForm" );
+
+cmdLineReadable
+	Performance( "performance" ) ,
+	ShowResidual( "showResidual" ) ,
+	ExactInterpolation( "exact" ) ,
+	Verbose( "verbose" );
+
+cmdLineParameter< int >
+#ifndef FAST_COMPILE
+	Degree( "degree" , DEFAULT_FEM_DEGREE ) ,
+#endif // !FAST_COMPILE
+	GSIterations( "iters" , 8 ) ,
+	Depth( "depth" , 8 ) ,
+	FullDepth( "fullDepth" , 5 ) ,
+	BaseDepth( "baseDepth" , 0 ) ,
+	BaseVCycles( "baseVCycles" , 1 ) ,
+	MaxMemoryGB( "maxMemory" , 0 ) ,
+	Threads( "threads" , omp_get_num_procs() );
+
+cmdLineParameter< float >
+	Scale( "scale" , 2.f ) ,
+	CGSolverAccuracy( "cgAccuracy" , float(1e-3) ) ,
+	DiffusionTime( "diffusion" , 0.0005f ) ,
+	WeightScale( "wScl" , 0.125f ) ,
+	WeightExponent( "wExp" , 6.f ) ,
+	ValueWeight( "valueWeight" , 1e-2f );
+
+cmdLineReadable* params[] =
+{
+#ifndef FAST_COMPILE
+	&Degree ,
+#endif // !FAST_COMPILE
+	&In , &Out , &Depth , &InXForm , &OutXForm ,
+	&Scale , &Verbose , &CGSolverAccuracy ,
+	&ShowResidual ,
+	&ValueWeight , &DiffusionTime ,
+	&Threads ,
+	&FullDepth ,
+	&GSIterations ,
+	&WeightScale , &WeightExponent ,
+	&BaseDepth , &BaseVCycles ,
+	&Performance ,
+	&ExactInterpolation ,
+	&MaxMemoryGB ,
+	NULL
+};
+
+
+void ShowUsage( char* ex )
+{
+	printf( "Usage: %s\n" , ex );
+	printf( "\t --%s <input mesh>\n" , In.name );
+	printf( "\t[--%s <output EDT solution>]\n" , Out.name );
+#ifndef FAST_COMPILE
+	printf( "\t[--%s <b-spline degree>=%d]\n" , Degree.name , Degree.value );
+#endif // !FAST_COMPILE
+	printf( "\t[--%s <maximum reconstruction depth>=%d]\n" , Depth.name , Depth.value );
+	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
+	printf( "\t[--%s <coarse MG solver depth>=%d]\n" , BaseDepth.name , BaseDepth.value );
+	printf( "\t[--%s <coarse MG solver v-cycles>=%d]\n" , BaseVCycles.name , BaseVCycles.value );
+	printf( "\t[--%s <scale factor>=%f]\n" , Scale.name , Scale.value );
+	printf( "\t[--%s <diffusion time>=%.3e]\n" , DiffusionTime.name , DiffusionTime.value );
+	printf( "\t[--%s <value interpolation weight>=%.3e]\n" , ValueWeight.name , ValueWeight.value );
+	printf( "\t[--%s <iterations>=%d]\n" , GSIterations.name , GSIterations.value );
+	printf( "\t[--%s]\n" , ExactInterpolation.name );
+#ifdef _OPENMP
+	printf( "\t[--%s <num threads>=%d]\n" , Threads.name , Threads.value );
+#endif // _OPENMP
+	printf( "\t[--%s <cg solver accuracy>=%g]\n" , CGSolverAccuracy.name , CGSolverAccuracy.value );
+	printf( "\t[--%s <successive under-relaxation weight>=%f]\n" , WeightScale.name , WeightScale.value );
+	printf( "\t[--%s <successive under-relaxation exponent>=%f]\n" , WeightExponent.name , WeightExponent.value );
+	printf( "\t[--%s <maximum memory (in GB)>=%d]\n" , MaxMemoryGB.name , MaxMemoryGB.value );
+	printf( "\t[--%s]\n" , Performance.name );
+	printf( "\t[--%s]\n" , Verbose.name );
+}
+
+template< unsigned int Dim , class Real >
+struct FEMTreeProfiler
+{
+	FEMTree< Dim , Real >& tree;
+	double t;
+
+	FEMTreeProfiler( FEMTree< Dim , Real >& t ) : tree(t) { ; }
+	void start( void ){ t = Time() , FEMTree< Dim , Real >::ResetLocalMemoryUsage(); }
+	void print( const char* header ) const
+	{
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+	}
+	void dumpOutput( const char* header ) const
+	{
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+	}
+	void dumpOutput2( std::vector< std::string >& comments , const char* header ) const
+	{
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+	}
+};
+
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetPointXForm( const std::vector< Point< Real , Dim > >& vertices , Real scaleFactor )
+{
+	Point< Real , Dim > min , max;
+	min = max = vertices[0];
+	for( int i=0 ; i<vertices.size() ; i++ ) for( int j=0 ; j<Dim ; j++ ) min[j] = std::min< Real >( min[j] , vertices[i][j] ) , max[j] = std::max< Real >( max[j] , vertices[i][j] );
+	Point< Real , Dim > center = ( max + min ) / 2;
+
+	Real scale = max[0]-min[0];
+	for( int d=1 ; d<Dim ; d++ ) scale = std::max< Real >( scale , max[d]-min[d] );
+	scale *= scaleFactor;
+	for( int i=0 ; i<Dim ; i++ ) center[i] -= scale/2;
+	XForm< Real , Dim+1 > tXForm = XForm< Real , Dim+1 >::Identity() , sXForm = XForm< Real , Dim+1 >::Identity();
+	for( int i=0 ; i<Dim ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(Dim,i) = -center[i];
+	return sXForm * tXForm;
+}
+
+template< class Real , unsigned int Dim >
+void Print( const XForm< Real , Dim >& xForm )
+{
+	for( int j=0 ; j<Dim ; j++ )
+	{
+		for( int i=0 ; i<Dim ; i++ ) printf( " %f" , xForm(i,j) );
+		printf( "\n" );
+	}
+}
+
+template< unsigned int Dim , class Real >
+struct ConstraintDual
+{
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( const Point< Real , Dim >& p ) const { return CumulativeDerivativeValues< Real , Dim , 0 >( ); }
+};
+template< unsigned int Dim , class Real >
+struct SystemDual
+{
+	Real weight;
+	SystemDual( Real w ) : weight(w){ }
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( Point< Real , Dim > p , const CumulativeDerivativeValues< Real , Dim , 0 >& dValues ) const { return dValues*weight; }
+	CumulativeDerivativeValues< double , Dim , 0 > operator()( Point< Real , Dim > p , const CumulativeDerivativeValues< double , Dim , 0 >& dValues ) const { return dValues * weight; };
+};
+template< unsigned int Dim >
+struct SystemDual< Dim , double >
+{
+	typedef double Real;
+	Real weight;
+	SystemDual( Real w ) : weight(w){ }
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( Point< Real , Dim > p , const CumulativeDerivativeValues< Real , Dim , 0 >& dValues ) const { return dValues*weight; }
+};
+
+template< unsigned int Dim , class Real , unsigned int FEMSig >
+void _Execute( int argc , char* argv[] )
+{
+	static const unsigned int Degree = FEMSignature< FEMSig >::Degree;
+	typedef typename FEMTree< Dim , Real >::template InterpolationInfo< Real , 0 > InterpolationInfo;
+	std::vector< std::string > comments;
+	messageWriter( comments , "*****************************************\n" );
+	messageWriter( comments , "*****************************************\n" );
+	messageWriter( comments , "** Running EDT in Heat (Version %s) **\n" , VERSION );
+	messageWriter( comments , "*****************************************\n" );
+	messageWriter( comments , "*****************************************\n" );
+
+
+	XForm< Real , Dim+1 > xForm , iXForm;
+	if( InXForm.set )
+	{
+		FILE* fp = fopen( InXForm.value , "r" );
+		if( !fp )
+		{
+			WARN( "Could not open file for reading x-form: %s" , InXForm.value );
+			xForm = XForm< Real , Dim+1 >::Identity();
+		}
+		else
+		{
+			for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ )
+			{
+				float f;
+				if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read xform" );
+				xForm(i,j) = (Real)f;
+			}
+			fclose( fp );
+		}
+	}
+	else xForm = XForm< Real , Dim+1 >::Identity();
+
+	char str[1024];
+	for( int i=0 ; params[i] ; i++ )
+		if( params[i]->set )
+		{
+			params[i]->writeValue( str );
+			if( strlen( str ) ) messageWriter( comments , "\t--%s %s\n" , params[i]->name , str );
+			else                messageWriter( comments , "\t--%s\n" , params[i]->name );
+		}
+
+	double startTime = Time();
+
+	FEMTree< Dim , Real > tree( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	FEMTreeProfiler< Dim , Real > profiler( tree );
+	if( !In.set )
+	{
+		ShowUsage( argv[0] );
+		return;
+	}
+	
+	std::vector< NodeAndPointSample< Dim , Real > > geometrySamples;
+	std::vector< NodeAndPointSample< Dim , Real > > heatPositions;
+	std::vector< Point< Real , Dim > > heatGradients;
+
+	// Read the mesh into the tree
+	{
+		profiler.start();
+		// Read the mesh
+		std::vector< Point< Real , Dim > > vertices;
+		std::vector< TriangleIndex > triangles;
+		{
+			int file_type;
+			std::vector< PlyVertex< float , Dim > > _vertices;
+			std::vector< std::vector< int > > _polygons;
+			std::vector< std::string > comments;
+			if( !PlyReadPolygons( In.value , _vertices , _polygons , PlyVertex< float , Dim >::PlyReadProperties() , PlyVertex< float , Dim >::PlyReadNum , file_type , comments ) )
+				ERROR_OUT( "Failed to read ply file: %s\n" , In.value );
+			vertices.resize( _vertices.size() );
+			for( int i=0 ; i<vertices.size() ; i++ ) for( int d=0 ; d<Dim ; d++ ) vertices[i][d] = _vertices[i].point[d];
+			triangles.resize( _polygons.size() );
+			for( int i=0 ; i<triangles.size() ; i++ ) for( int j=0 ; j<Dim ; j++ ) triangles[i][j] = _polygons[i][j];
+		}
+		for( int i=0 ; i<vertices.size() ; i++ ) vertices[i] = xForm * vertices[i];
+		XForm< Real , Dim+1 > _xForm = GetPointXForm< Real , Dim >( vertices , (Real)Scale.value );
+		for( int i=0 ; i<vertices.size() ; i++ ) vertices[i] = _xForm * vertices[i];
+		xForm = _xForm * xForm;
+		FEMTreeInitializer< Dim , Real >::Initialize( tree.spaceRoot() , vertices , triangles , Depth.value , geometrySamples , true , tree.nodeAllocator , tree.initializer() );
+		iXForm = xForm.inverse();
+		if( OutXForm.set )
+		{
+			FILE* fp = fopen( OutXForm.value , "w" );
+			if( !fp ) WARN( "Could not open file for writing x-form: %s" , OutXForm.value );
+			else
+			{
+				for( int i=0 ; i<Dim+1 ; i++ )
+				{
+					for( int j=0 ; j<Dim+1 ; j++ ) fprintf( fp , " %f" , (float)iXForm(i,j) );
+					fprintf( fp , "\n" );
+				}
+				fclose( fp );
+			}
+		}
+
+		double area = 0;
+#pragma omp parallel for reduction( + : area )
+		for( int i=0 ; i<triangles.size() ; i++ )
+		{
+			Simplex< Real , Dim , Dim-1 > s;
+			for( int k=0 ; k<Dim ; k++ ) for( int j=0 ; j<Dim ; j++ ) s[k][j] = vertices[ triangles[i][k] ][j];
+			Real a2 = s.squareMeasure();
+			if( a2>0 ) area += sqrt(a2) / 2;
+		}
+		messageWriter( "Input Vertices / Triangle / Samples / Area: %d / %d / %d / %g\n" , (int)vertices.size() , (int)triangles.size() , geometrySamples.size() , area );
+		profiler.dumpOutput2( comments , "# Read input into tree:" );
+	}
+
+	// Thicken the tree around the mesh
+	{
+		profiler.start();
+		typename FEMTree< Dim , Real >::FEMTreeNode** nodes = new typename FEMTree< Dim , Real >::FEMTreeNode*[ geometrySamples.size() ];
+		for( int i=0 ; i<geometrySamples.size() ; i++ ) nodes[i] = geometrySamples[i].node;
+		tree.template thicken< Degree >( nodes , (int)geometrySamples.size() );
+		profiler.dumpOutput2( comments , "#       Thickened tree:" );
+		delete[] nodes;
+	}
+
+	// Finalize the topology of the tree
+	{
+		profiler.start();
+		tree.template finalizeForMultigrid< Degree >( FullDepth.value , typename FEMTree< Dim , Real >::TrivialHasDataFunctor() );
+		profiler.dumpOutput2( comments , "#       Finalized tree:" );
+	}
+
+	messageWriter( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
+	messageWriter( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage())/(1<<20) );
+
+	SparseNodeData< Point< Real , Dim+1 > , IsotropicUIntPack< Dim , FEMTrivialSignature > > leafValues;
+	const double GradientCutOff = 0;
+
+	// Compute the heat solution
+	DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > > heatSolution;
+	DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > > constraints;
+
+	// Add the FEM constraints
+	{
+		profiler.start();
+		constraints = tree.initDenseNodeData( IsotropicUIntPack< Dim , FEMSig >() );
+		DenseNodeData< Point< Real , 1 > , IsotropicUIntPack< Dim , FEMTrivialSignature > > _constraints( tree.nodesSize() );
+		for( int i=0 ; i<geometrySamples.size() ; i++ ) _constraints[ geometrySamples[i].node ][0] = geometrySamples[i].sample.weight * ( 1<<(Depth.value*Dim) );
+		typename FEMIntegrator::template ScalarConstraint< IsotropicUIntPack< Dim , FEMSig > , IsotropicUIntPack< Dim , 0 > , IsotropicUIntPack< Dim , FEMTrivialSignature > , IsotropicUIntPack< Dim , 0 > > F( {1.} );
+		tree.addFEMConstraints( F , _constraints , constraints , Depth.value );
+		profiler.dumpOutput2( comments , "# Set heat constraints:" );
+	}
+
+	// Solve the linear system
+	{
+		profiler.start();
+		typename FEMTree< Dim , Real >::SolverInfo sInfo;
+		sInfo.cgDepth = 0 , sInfo.cascadic = false , sInfo.iters = GSIterations.value , sInfo.vCycles = 1 , sInfo.cgAccuracy = CGSolverAccuracy.value , sInfo.verbose = Verbose.set , sInfo.showResidual = ShowResidual.set , sInfo.showGlobalResidual = SHOW_GLOBAL_RESIDUAL_NONE , sInfo.sliceBlockSize = 1;
+		sInfo.useSupportWeights = true;
+		sInfo.sorRestrictionFunction  = [&]( Real w , Real ){ return ( Real )( WeightScale.value * pow( w , WeightExponent.value ) ); };
+		{
+			typename FEMIntegrator::template System< IsotropicUIntPack< Dim , FEMSig > , IsotropicUIntPack< Dim , 1 > > F( { 1. , (double)DiffusionTime.value } );
+			heatSolution = tree.solveSystem( IsotropicUIntPack< Dim , FEMSig >() , F , constraints , Depth.value , sInfo );
+		}
+		sInfo.baseDepth = BaseDepth.value , sInfo.baseVCycles = BaseVCycles.value;
+		profiler.dumpOutput2( comments , "#   Heat system solved:" );
+	}
+
+	// Evaluate the gradients at the leaves
+	{
+		profiler.start();
+
+		typename FEMTree< Dim , Real >::template MultiThreadedEvaluator< IsotropicUIntPack< Dim , FEMSig > , 0 > evaluator( &tree , heatSolution );
+		typedef typename RegularTreeNode< Dim , FEMTreeNodeData >::template ConstNeighbors< IsotropicUIntPack< Dim , 3 > > OneRingNeighbors;
+		typedef typename RegularTreeNode< Dim , FEMTreeNodeData >::template ConstNeighborKey< IsotropicUIntPack< Dim , 1 > , IsotropicUIntPack< Dim , 1 > > OneRingNeighborKey;
+		std::vector< OneRingNeighborKey > oneRingNeighborKeys( omp_get_max_threads() );
+		int treeDepth = tree.tree().maxDepth();
+		for( int i=0 ; i<oneRingNeighborKeys.size() ; i++ ) oneRingNeighborKeys[i].set( treeDepth );
+		DenseNodeData< Real , IsotropicUIntPack< Dim , FEMTrivialSignature > > leafCenterValues = tree.initDenseNodeData( IsotropicUIntPack< Dim , FEMTrivialSignature >() );
+
+#pragma omp parallel for
+		for( int i=tree.nodesBegin(0) ; i<tree.nodesEnd(Depth.value) ; i++ ) if( tree.isValidSpaceNode( tree.node(i) ) )
+		{
+			Point< Real , Dim > center ; Real width;
+			tree.centerAndWidth( i , center , width );
+			leafCenterValues[i] = evaluator.values( center , omp_get_thread_num() )[0];
+		}
+
+		auto CenterGradient = [&] ( const RegularTreeNode< Dim , FEMTreeNodeData >* leaf , int thread )
+		{
+			int d , off[Dim] ; Point< Real , Dim > p ; Real width , _width = (Real)1./(1<<Depth.value);
+			tree.depthAndOffset( leaf , d , off ) , tree.centerAndWidth( leaf->nodeData.nodeIndex , p , width );
+			int res = 1<<d , _res = 1<<Depth.value;
+			Point< Real , Dim > g;
+			unsigned int index1[Dim] , index2[Dim];
+			for( int dd=0 ; dd<Dim ; dd++ ) index1[dd] = index2[dd] = 1;
+			const OneRingNeighbors& neighbors = oneRingNeighborKeys[thread].getNeighbors( leaf );
+			for( int c=0 ; c<Dim ; c++ )
+			{
+				Real value1 , value2;
+				if( off[c]-1>=0  ) index1[c] = 0;
+				if( off[c]+1<res ) index2[c] = 2;
+				const RegularTreeNode< Dim , FEMTreeNodeData >* node1 = neighbors.neighbors().data[ GetWindowIndex( IsotropicUIntPack< Dim , 3 >() , index1 ) ];
+				const RegularTreeNode< Dim , FEMTreeNodeData >* node2 = neighbors.neighbors().data[ GetWindowIndex( IsotropicUIntPack< Dim , 3 >() , index2 ) ];
+				if( d==Depth.value && tree.isValidSpaceNode( node2 ) ) value2 = leafCenterValues[ node2->nodeData.nodeIndex ];
+				else
+				{
+					Point< Real , Dim > delta;
+					delta[c] = ( (int)index2[c]-1 ) * _width;
+					value2 = evaluator.values( p+delta , thread )[0];
+				}
+				if( d==Depth.value && tree.isValidSpaceNode( node1 ) ) value1 = leafCenterValues[ node1->nodeData.nodeIndex ];
+				else
+				{
+					Point< Real , Dim > delta;
+					delta[c] = ( (int)index1[c]-1 ) * _width;
+					value1 = evaluator.values( p+delta , thread )[0];
+				}
+				
+				g[c] = ( value2 - value1 ) / ( (Real)( index2[c] - index1[c] ) );
+
+				index1[c] = index2[c] = 1;
+			}
+
+			return g * _res;
+		};
+
+		for( int i=tree.nodesBegin(0) ; i<tree.nodesEnd(Depth.value) ; i++ ) if( tree.isValidSpaceNode( tree.node(i) ) && !tree.isValidSpaceNode( tree.node(i)->children ) )
+		{
+			RegularTreeNode< Dim , FEMTreeNodeData >* leaf = ( RegularTreeNode< Dim , FEMTreeNodeData >* )tree.node(i);
+			leafValues[leaf] *= 0;
+		}
+
+#pragma omp parallel for
+		for( int i=tree.nodesBegin(0) ; i<tree.nodesEnd(Depth.value) ; i++ ) if( tree.isValidSpaceNode( tree.node(i) ) && !tree.isValidSpaceNode( tree.node(i)->children ) )
+		{
+			RegularTreeNode< Dim , FEMTreeNodeData >* leaf = ( RegularTreeNode< Dim , FEMTreeNodeData >* )tree.node(i);
+			Point< Real , Dim > g = CenterGradient( leaf , omp_get_thread_num() );
+			Real len = (Real)Length( g );
+			if( len>GradientCutOff ) g /= len;
+			Point< Real , Dim+1 >* leafValue = leafValues(leaf);
+			if( leafValue ) for( int d=0 ; d<Dim ; d++ ) (*leafValue)[d+1] = -g[d];
+			else ERROR_OUT( "Leaf value doesn't exist" );
+		}
+		profiler.dumpOutput2( comments , "#  Evaluated gradients:" );
+	}
+
+
+	// Compute the EDT
+	{
+		// Evaluate the gradients at the center of the leaf nodes
+		DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > > edtSolution , constraints;
+		InterpolationInfo *valueInfo = NULL;
+
+		// Add the FEM constraints
+		{
+			profiler.start();
+			constraints = tree.initDenseNodeData( IsotropicUIntPack< Dim , FEMSig >() );
+			typename FEMIntegrator::template Constraint< IsotropicUIntPack< Dim , FEMSig > , IsotropicUIntPack< Dim , 1 > , IsotropicUIntPack< Dim , FEMTrivialSignature > , IsotropicUIntPack< Dim , 0 > , Dim+1 > F;
+			typedef IsotropicUIntPack< Dim , 1 > Derivatives1;
+			typedef IsotropicUIntPack< Dim , 0 > Derivatives2;
+			unsigned int derivatives2[Dim];
+			for( int d=0 ; d<Dim ; d++ ) derivatives2[d] = 0;
+			for( int d=0 ; d<Dim ; d++ )
+			{
+				unsigned int derivatives1[Dim];
+				for( int dd=0 ; dd<Dim ; dd++ ) derivatives1[dd] = dd==d ? 1 : 0;
+				F.weights[d+1][TensorDerivatives< Derivatives1 >::Index( derivatives1 )][ TensorDerivatives< Derivatives2 >::Index( derivatives2 )] = 1.;
+			}
+			tree.addFEMConstraints( F , leafValues , constraints , Depth.value );
+			profiler.dumpOutput2( comments , "#  Set EDT constraints:" );
+		}
+
+		// Add the interpolation constraints
+		if( ValueWeight.value>0 )
+		{
+			profiler.start();
+			if( ExactInterpolation.set ) valueInfo = FEMTree< Dim , Real >::template       InitializeExactPointInterpolationInfo< Real , 0 >( tree , geometrySamples , ConstraintDual< Dim , Real >() , SystemDual< Dim , Real >( std::max< Real >( 0 , (Real)ValueWeight.value ) ) , true , false );
+			else                         valueInfo = FEMTree< Dim , Real >::template InitializeApproximatePointInterpolationInfo< Real , 0 >( tree , geometrySamples , ConstraintDual< Dim , Real >() , SystemDual< Dim , Real >( std::max< Real >( 0 , (Real)ValueWeight.value ) ) , true , 0 );
+			tree.addInterpolationConstraints( constraints , Depth.value , *valueInfo );
+			profiler.dumpOutput2( comments , "#Set point constraints:" );
+		}
+
+		// Solve the linear system
+		{
+			profiler.start();
+			typename FEMTree< Dim , Real >::SolverInfo sInfo;
+			sInfo.cgDepth = 0 , sInfo.cascadic = true , sInfo.vCycles = 1 , sInfo.cgAccuracy = CGSolverAccuracy.value , sInfo.verbose = Verbose.set , sInfo.showResidual = ShowResidual.set , sInfo.showGlobalResidual = SHOW_GLOBAL_RESIDUAL_NONE , sInfo.sliceBlockSize = 1;
+			sInfo.iters = GSIterations.value;
+			sInfo.baseDepth = BaseDepth.value , sInfo.baseVCycles = BaseVCycles.value;
+			sInfo.useSupportWeights = true;
+			sInfo.sorRestrictionFunction  = [&]( Real w , Real ){ return (Real)( WeightScale.value * pow( w , WeightExponent.value ) ); }; 
+			typename FEMIntegrator::template System< IsotropicUIntPack< Dim , FEMSig > , IsotropicUIntPack< Dim , 1 > > F( { 0. , 1. } );
+			edtSolution = tree.solveSystem( IsotropicUIntPack< Dim , FEMSig >() , F , constraints , Depth.value , sInfo , valueInfo );
+			profiler.dumpOutput2( comments , "#    EDT system solved:" );
+		}
+		if( valueInfo ) delete valueInfo , valueInfo = NULL;
+
+		{
+			auto GetAverageValueAndError = [&]( const FEMTree< Dim , Real >* tree , const DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > >& coefficients , double& average , double& error )
+			{
+				double errorSum = 0 , valueSum = 0 , weightSum = 0;
+				typename FEMTree< Dim , Real >::template MultiThreadedEvaluator< IsotropicUIntPack< Dim , FEMSig > , 0 > evaluator( tree , coefficients );
+#pragma omp parallel for reduction( + : errorSum , valueSum , weightSum )
+				for( int j=0 ; j<geometrySamples.size() ; j++ )
+				{
+					ProjectiveData< Point< Real , Dim > , Real >& sample = geometrySamples[j].sample;
+					Real w = sample.weight;
+					Real value = evaluator.values( sample.data / sample.weight , omp_get_thread_num() , geometrySamples[j].node )[0];
+					errorSum += value * value * w;
+					valueSum += value * w;
+					weightSum += w;
+				}
+				average = valueSum / weightSum , error = sqrt( errorSum / weightSum );
+			};
+			double average , error;
+			GetAverageValueAndError( &tree , edtSolution , average , error );
+			if( Verbose.set ) printf( "Interpolation average / error: %g / %g\n" , average , error );
+#pragma omp parallel for
+			for( int i=tree.nodesBegin(0) ; i<tree.nodesEnd(0) ; i++ ) edtSolution[i] -= (Real)average;
+		}
+
+		if( Out.set )
+		{
+			FILE* fp = fopen( Out.value , "wb" );
+			if( !fp ) ERROR_OUT( "Failed to open file for writing: %s" , Out.value );
+			FEMTree< Dim , Real >::WriteParameter( fp );
+			DenseNodeData< Real , IsotropicUIntPack< Dim , FEMSig > >::WriteSignatures( fp );
+			tree.write( fp );
+			edtSolution.write( fp );
+			fclose( fp );
+		}
+	}
+}
+
+#ifndef FAST_COMPILE
+template< unsigned int Dim , class Real >
+void Execute( int argc , char* argv[] )
+{
+	switch( Degree.value )
+	{
+		case 1: return _Execute< Dim , Real , FEMDegreeAndBType< 1 , BOUNDARY_FREE >::Signature >( argc , argv );
+		case 2: return _Execute< Dim , Real , FEMDegreeAndBType< 2 , BOUNDARY_FREE >::Signature >( argc , argv );
+		case 3: return _Execute< Dim , Real , FEMDegreeAndBType< 3 , BOUNDARY_FREE >::Signature >( argc , argv );
+		case 4: return _Execute< Dim , Real , FEMDegreeAndBType< 4 , BOUNDARY_FREE >::Signature >( argc , argv );
+		default: ERROR_OUT( "Only B-Splines of degree 1 - 4 are supported" );
+	}
+}
+#endif // !FAST_COMPILE
+int main( int argc , char* argv[] )
+{
+	Timer timer;
+#ifdef ARRAY_DEBUG
+	WARN( "Array debugging enabled" );
+#endif // ARRAY_DEBUG
+	cmdLineParse( argc-1 , &argv[1] , params );
+	omp_set_num_threads( Threads.value > 1 ? Threads.value : 1 );
+	if( MaxMemoryGB.value>0 ) SetPeakMemoryMB( MaxMemoryGB.value<<10 );
+	messageWriter.echoSTDOUT = Verbose.set;
+
+#ifdef USE_DOUBLE
+	typedef double Real;
+#else // !USE_DOUBLE
+	typedef float  Real;
+#endif // USE_DOUBLE
+
+#ifdef FAST_COMPILE
+	static const int Degree = DEFAULT_FEM_DEGREE;
+	static const BoundaryType BType = BOUNDARY_FREE;
+
+	WARN( "Compiled for degree-%d, boundary-%s, %s-precision _only_" , Degree , BoundaryNames[ BType ] , sizeof(Real)==4 ? "single" : "double" );
+	if( BaseDepth.value>FullDepth.value )
+	{
+		if( BaseDepth.set ) WARN( "Base depth must be smaller than full depth: %d <= %d" , BaseDepth.value , FullDepth.value );
+		BaseDepth.value = FullDepth.value;
+	}
+	_Execute< DIMENSION , Real , FEMDegreeAndBType< Degree , BType >::Signature >( argc , argv );
+#else // !FAST_COMPILE
+	Execute< DIMENSION , Real >( argc , argv );
+#endif // FAST_COMPILE
+	if( Performance.set )
+	{
+		printf( "Time (Wall/CPU): %.2f / %.2f\n" , timer.wallTime() , timer.cpuTime() );
+		printf( "Peak Memory (MB): %d\n" , MemoryInfo::PeakMemoryUsageMB() );
+	}
+	return EXIT_SUCCESS;
+}
diff --git a/Src/FEMTree.Evaluation.inl b/Src/FEMTree.Evaluation.inl
new file mode 100644
index 0000000..8fd7937
--- /dev/null
+++ b/Src/FEMTree.Evaluation.inl
@@ -0,0 +1,941 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< double , Dim , _PointD > FEMTree< Dim , Real >::_Evaluator< UIntPack< FEMSigs ... > , PointD >::_values( unsigned int d , const int fIdx[Dim] , const int cIdx[Dim] , const _CenterOffset off[Dim] , bool parentChild ) const
+{
+	double dValues[Dim][_PointD+1];
+	_setDValues< _PointD >( d , fIdx , cIdx , off , parentChild , dValues );
+	return Evaluate< Dim , double , _PointD >( dValues );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< double , Dim , _PointD > FEMTree< Dim , Real >::_Evaluator< UIntPack< FEMSigs ... > , PointD >::_centerValues( unsigned int d , const int fIdx[Dim] , const int cIdx[Dim] , bool parentChild ) const
+{
+	_CenterOffset off[Dim];
+	for( int d=0 ; d<Dim ; d++ ) off[d] = CENTER;
+	return _values< _PointD >( d , fIdx , cIdx , off , parentChild );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< double , Dim , _PointD > FEMTree< Dim , Real >::_Evaluator< UIntPack< FEMSigs ... > , PointD >::_cornerValues( unsigned int d , const int fIdx[Dim] , const int cIdx[Dim] , int corner , bool parentChild ) const
+{
+	_CenterOffset off[Dim];
+	for( int d=0 ; d<Dim ; d++ ) off[d] = ( (corner>>d) & 1 ) ? FRONT : BACK;
+	return _values< _PointD >( d , fIdx , cIdx , off , parentChild );
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD >
+void FEMTree< Dim , Real >::_Evaluator< UIntPack< FEMSigs ... > , PointD >::set( LocalDepth maxDepth )
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > CenterSizes;
+	static const unsigned int LeftCenterRadii[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd ... };
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > CornerSizes;
+	static const unsigned int LeftCornerRadii[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd ... };
+	typedef UIntPack< ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerSize + 1 ) ... > BCornerSizes;
+	static const unsigned int LeftBCornerRadii[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerEnd ... };
+
+	if( stencilData ) DeletePointer( stencilData );
+	stencilData = NewPointer< StencilData >( maxDepth+1 );
+	if( evaluators ) DeletePointer( evaluators );
+	evaluators = NewPointer< Evaluators >( maxDepth+1 );
+	if( childEvaluators ) DeletePointer( childEvaluators );
+	childEvaluators = NewPointer< ChildEvaluators >( maxDepth+1 );
+	_setEvaluators( maxDepth );
+	for( int depth=0 ; depth<=maxDepth ; depth++ )
+	{
+		int center = ( 1<<depth )>>1;
+		int cIdx[Dim] , fIdx[Dim];
+		for( int d=0 ; d<Dim ; d++ ) cIdx[d] = center;
+
+		// First set the stencils for the current depth
+		{
+			// The center stencil
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , CenterSizes() ,
+				[&]( int d , int i ){ fIdx[d] = center + i - LeftCenterRadii[d]; } ,
+				[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _centerValues( depth , fIdx , cIdx , false ); } ,
+				stencilData[depth].ccCenterStencil()
+			);
+			// The corner stencil
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , CornerSizes() ,
+					[&]( int d , int i ){ fIdx[d] = center + i - LeftCornerRadii[d]; } ,
+					[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _cornerValues( depth , fIdx , cIdx , c , false ); } ,
+					stencilData[depth].ccCornerStencil[c]()
+				);
+			// The boundary corner stencil
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , BCornerSizes() ,
+					[&]( int d , int i ){ fIdx[d] = center + i - LeftBCornerRadii[d]; } ,
+					[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _cornerValues( depth , fIdx , cIdx , c , false ); } ,
+					stencilData[depth].ccBCornerStencil[c]()
+				);
+		}
+
+		// Now set the stencils for the parents
+		for( int c=0 ; c<(1<<Dim) ; c++ )
+		{
+			int cIdx[Dim] , fIdx[Dim];
+			for( int d=0 ; d<Dim ; d++ ) cIdx[d] = center + ( (c>>d) & 1 );
+
+			// The center stencil
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , CenterSizes() ,
+				[&]( int d , int i ){ fIdx[d] = center/2 + i - LeftCenterRadii[d]; } ,
+				[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _centerValues( depth , fIdx , cIdx , true ); } ,
+				stencilData[depth].pcCenterStencils[c]()
+			);
+			// The corner stencil
+			for( int cc=0 ; cc<(1<<Dim) ; cc++ )
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , CornerSizes() ,
+					[&]( int d , int i ){ fIdx[d] = center/2 + i - LeftCornerRadii[d]; } ,
+					[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _cornerValues( depth , fIdx , cIdx , cc , true ); } ,
+					stencilData[depth].pcCornerStencils[c][cc]()
+				);
+			// The boundary corner stencil
+			for( int cc=0 ; cc<(1<<Dim) ; cc++ )
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , BCornerSizes() ,
+					[&]( int d , int i ){ fIdx[d] = center/2 + i - LeftBCornerRadii[d]; } ,
+					[&]( CumulativeDerivativeValues< double , Dim , PointD >& p ){ p = _cornerValues( depth , fIdx , cIdx , cc , true ); } ,
+					stencilData[depth].pcBCornerStencils[c][cc]()
+				);
+		}
+	}
+	if( _pointEvaluator ) delete _pointEvaluator;
+	_pointEvaluator = new PointEvaluator< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , PointD > >( maxDepth );
+}
+
+template< unsigned int Dim , class Real >
+template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+CumulativeDerivativeValues< V , Dim , _PointD > FEMTree< Dim , Real >::_getValues( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , Point< Real , Dim > p , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth ) const
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > SupportSizes;
+
+	if( IsActiveNode< Dim >( node->children ) && _localDepth( node->children )<=maxDepth ) WARN( "getValue assumes leaf node" );
+	CumulativeDerivativeValues< V , Dim , _PointD > values;
+
+	PointEvaluatorState< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , _PointD > > state;
+
+#ifdef SHOW_WARNINGS
+#pragma message ( "[WARNING] Nudging evaluation point into the interior" )
+#endif // SHOW_WARNINGS
+	for( int dd=0 ; dd<Dim ; dd++ )
+	{
+		if     ( p[dd]==0 ) p[dd] = (Real)(0.+1e-6);
+		else if( p[dd]==1 ) p[dd] = (Real)(1.-1e-6);
+	}
+	auto AddToValues = [&]( const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors , ConstPointer( V ) coefficients )
+	{
+		ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+		for( unsigned int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+		{
+			LocalDepth d ; LocalOffset off ; _localDepthAndOffset( nodes[i] , d , off );
+			CumulativeDerivativeValues< Real , Dim , _PointD > _values = state.template dValues< Real , CumulativeDerivatives< Dim , _PointD > >( off );
+			for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[i]->nodeData.nodeIndex ] * _values[d];
+		}
+	};
+
+	LocalDepth depth = _localDepth( node );
+	while( GetGhostFlag< Dim >( node ) ) node = node->parent , depth--;
+
+	{
+		evaluator._pointEvaluator->initEvaluationState( p , depth , state );
+		AddToValues( neighborKey.neighbors[ node->depth() ] , solution );
+		if( depth>0 )
+		{
+			evaluator._pointEvaluator->initEvaluationState( p , depth-1 , state );
+			AddToValues( neighborKey.neighbors[ node->parent->depth() ] , coarseSolution );
+		}
+	}
+	// If there could be finer neighbors whose support overlaps the point
+	if( depth<_maxDepth )
+	{
+		typename FEMTreeNode::template ConstNeighbors< SupportSizes > cNeighbors;
+		int cIdx = 0;
+		Point< Real , Dim > c ; Real w;
+		_centerAndWidth( node , c , w );
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]>c[d] ) cIdx |= (1<<d);
+		if( neighborKey.getChildNeighbors( cIdx , node->depth() , cNeighbors ) )
+		{
+			evaluator._pointEvaluator->initEvaluationState( p , depth+1 , state );
+			AddToValues( cNeighbors , solution );
+		}
+	}
+	return values;
+}
+template< unsigned int Dim , class Real >
+template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+CumulativeDerivativeValues< V , Dim , _PointD > FEMTree< Dim , Real >::_getCenterValues( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const
+{
+	if( IsActiveNode< Dim >( node->children ) && _localDepth( node->children )<=maxDepth ) ERROR_OUT( "getCenterValues assumes leaf node" );
+	typedef _Evaluator< UIntPack< FEMSigs ... > , PointD > _Evaluator;
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > SupportSizes;
+	static const unsigned int supportSizes[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... };
+
+	if( IsActiveNode< Dim >( node->children ) && _localDepth( node->children )<=maxDepth ) ERROR_OUT( "getCenterValue assumes leaf node" );
+	CumulativeDerivativeValues< V , Dim , _PointD > values;
+
+	LocalDepth d ; LocalOffset cIdx;
+	_localDepthAndOffset( node , d , cIdx );
+
+	static const int corner = (1<<Dim)-1;
+
+	static const CornerLoopData< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > loopData;
+	auto AddToValuesInterior = [&]
+	( 
+		unsigned int size , const unsigned int* indices ,
+		const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors ,
+		const typename _Evaluator::CornerStencil& cornerStencil ,
+		ConstPointer( V ) coefficients
+	)
+	{
+		ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+		ConstPointer( CumulativeDerivativeValues< double , Dim , PointD > ) _values = cornerStencil().data;
+		for( unsigned int i=0 ; i<size ; i++ ) 
+		{
+			int idx = indices[i];
+			if( IsActiveNode< Dim >( nodes[ idx ] ) ) for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[ idx ]->nodeData.nodeIndex ] * (Real)_values[ idx ][d];
+		}
+	};
+	auto AddToValuesExterior = [&]
+	( 
+		unsigned int size , const unsigned int* indices ,
+		LocalDepth d , LocalOffset cIdx ,
+		const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors ,
+		ConstPointer( V ) coefficients , bool parent
+	)
+	{
+		ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+		for( unsigned int i=0 ; i<size ; i++ ) 
+		{
+			int idx = indices[i];
+			if( IsActiveNode< Dim >( nodes[ idx ] ) )
+			{
+				LocalDepth _d ; LocalOffset fIdx;
+				this->_localDepthAndOffset( nodes[idx] , _d , fIdx );
+				CumulativeDerivativeValues< double , Dim , _PointD > _values = evaluator.template _cornerValues< _PointD >( d , fIdx , cIdx , corner , parent );
+				for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[ idx ]->nodeData.nodeIndex ] * (Real)_values[d];
+			}
+		}
+	};
+
+	if( isInterior )
+	{
+		auto AddToValues = [&]
+		(
+			const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors ,
+			const typename _Evaluator::CenterStencil& centerStencil ,
+			ConstPointer( V ) coefficients
+		)
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			ConstPointer( CumulativeDerivativeValues< double , Dim , PointD > ) _values = centerStencil.data;
+			for( int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+				for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[i]->nodeData.nodeIndex ] * (Real)_values[i][d];
+		};
+		AddToValues( neighborKey.neighbors[ node->depth() ] , evaluator.stencilData[d].ccCenterStencil , solution );
+		if( d>0 )
+		{
+			int _corner = int( node - node->parent->children );
+			AddToValues( neighborKey.neighbors[ node->parent->depth() ] , evaluator.stencilData[d].pcCenterStencils[_corner] , coarseSolution );
+		}
+	}
+	else
+	{
+		auto AddToValues = [&]( const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors , ConstPointer( V ) coefficients , bool parentChild )
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			for( int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) ) 
+			{
+				LocalDepth _d ; LocalOffset fIdx;
+				_localDepthAndOffset( nodes[i] , _d , fIdx );
+				const CumulativeDerivativeValues< double , Dim , _PointD >& _values = evaluator.template _centerValues< _PointD >( d , fIdx , cIdx , parentChild );
+				for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[i]->nodeData.nodeIndex ] * (Real)_values[d];
+			}
+		};
+
+		AddToValues( neighborKey.neighbors[ node->depth() ] , solution , false );
+		if( d>0 ) AddToValues( neighborKey.neighbors[ node->parent->depth() ] , coarseSolution , true );
+	}
+	// If there could be finer neighbors whose support overlaps the point
+	if( d<_maxDepth )
+	{
+		typename FEMTreeNode::template ConstNeighbors< SupportSizes > cNeighbors;
+		if( neighborKey.getChildNeighbors( 0 , node->depth() , cNeighbors ) )
+		{
+			if( isInterior ) AddToValuesInterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , cNeighbors , evaluator.stencilData[d+1].ccCornerStencil[corner] , solution );
+			else
+			{
+				LocalDepth _d=d+1 ; LocalOffset _cIdx;
+				for( int d=0 ; d<Dim ; d++ ) _cIdx[d] = cIdx[d]<<1;
+				AddToValuesExterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , _d , _cIdx , cNeighbors , solution , false );
+			}
+		}
+	}
+	return values;
+}
+
+template< unsigned int Dim , class Real >
+template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+CumulativeDerivativeValues< V , Dim , _PointD > FEMTree< Dim , Real >::_getCornerValues( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , int corner , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const
+{
+	if( IsActiveNode< Dim >( node->children ) && _localDepth( node->children )<=maxDepth ) WARN( "getValue assumes leaf node" );
+	typedef _Evaluator< UIntPack< FEMSigs ... > , PointD > _Evaluator;
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > SupportSizes;
+	static const unsigned int supportSizes[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... };
+
+	CumulativeDerivativeValues< V , Dim , _PointD > values;
+	LocalDepth d ; LocalOffset cIdx;
+	_localDepthAndOffset( node , d , cIdx );
+
+	static const CornerLoopData< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > loopData;
+	{
+		auto AddToValuesInterior = [&]
+		( 
+			unsigned int size , const unsigned int* indices ,
+			const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors ,
+			const typename _Evaluator::CornerStencil& cornerStencil ,
+			ConstPointer( V ) coefficients
+		)
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			ConstPointer( CumulativeDerivativeValues< double , Dim , PointD > ) _values = cornerStencil().data;
+			for( unsigned int i=0 ; i<size ; i++ ) 
+			{
+				int idx = indices[i];
+				if( IsActiveNode< Dim >( nodes[ idx ] ) ) for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[ idx ]->nodeData.nodeIndex ] * (Real)_values[ idx ][d];
+			}
+		};
+		auto AddToValuesExterior = [&]
+		( 
+			unsigned int size , const unsigned int* indices ,
+			LocalDepth d , LocalOffset cIdx ,
+			const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors ,
+			ConstPointer( V ) coefficients , bool parent
+		)
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			for( unsigned int i=0 ; i<size ; i++ ) 
+			{
+				int idx = indices[i];
+				if( IsActiveNode< Dim >( nodes[ idx ] ) )
+				{
+					LocalDepth _d ; LocalOffset fIdx;
+					this->_localDepthAndOffset( nodes[idx] , _d , fIdx );
+					CumulativeDerivativeValues< double , Dim , _PointD > _values = evaluator.template _cornerValues< _PointD >( d , fIdx , cIdx , corner , parent );
+					for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[ idx ]->nodeData.nodeIndex ] * (Real)_values[d];
+				}
+			}
+		};
+		if( isInterior ) AddToValuesInterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , neighborKey.neighbors[ node->depth() ] , evaluator.stencilData[d].ccCornerStencil[corner] , solution );
+		else             AddToValuesExterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , d , cIdx , neighborKey.neighbors[ node->depth() ] , solution , false );
+		if( d>0 )
+		{
+			int _corner = int( node - node->parent->children );
+			if( isInterior ) AddToValuesInterior( loopData.pcSize[corner][_corner] , loopData.pcIndices[corner][_corner] , neighborKey.neighbors[ node->parent->depth() ] , evaluator.stencilData[d].pcCornerStencils[_corner][corner] , coarseSolution );
+			else             AddToValuesExterior( loopData.pcSize[corner][_corner] , loopData.pcIndices[corner][_corner] , d , cIdx , neighborKey.neighbors[ node->parent->depth() ] , coarseSolution , true );
+		}
+		// If there could be finer neighbors whose support overlaps the point
+		if( d<_maxDepth )
+		{
+			typename FEMTreeNode::template ConstNeighbors< SupportSizes > cNeighbors;
+			if( neighborKey.getChildNeighbors( corner , node->depth() , cNeighbors ) )
+			{
+				if( isInterior ) AddToValuesInterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , cNeighbors , evaluator.stencilData[d+1].ccCornerStencil[corner] , solution );
+				else
+				{
+					LocalDepth _d=d+1 ; LocalOffset _cIdx;
+					for( int d=0 ; d<Dim ; d++ ) _cIdx[d] = (cIdx[d]<<1) | ( (corner&(1<<d)) ? 1 : 0 );
+					AddToValuesExterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , _d , _cIdx , cNeighbors , solution , false );
+				}
+			}
+		}
+		return values;
+	}
+}
+template< unsigned int Dim , class Real >
+template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+CumulativeDerivativeValues< V , Dim , _PointD > FEMTree< Dim , Real >::_getCornerValues( const ConstCornerSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , int corner , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const
+{
+	typedef _Evaluator< UIntPack< FEMSigs ... > , PointD > _Evaluator;
+
+	typedef UIntPack< ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerSize + 1 ) ... > BCornerSizes;
+	static const unsigned int bCornerSizes[] = { ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerSize + 1 ) ... };
+	CumulativeDerivativeValues< V , Dim , _PointD > values;
+	LocalDepth d ; LocalOffset cIdx;
+	_localDepthAndOffset( node , d , cIdx );
+
+	static const CornerLoopData< ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerSize + 1 ) ... > loopData;
+
+	{
+		auto AddToValuesInterior = [&]
+		( 
+			unsigned int size , const unsigned int* indices ,
+			const typename FEMTreeNode::template ConstNeighbors< BCornerSizes >& neighbors ,
+			const typename _Evaluator::BCornerStencil& cornerStencil ,
+			ConstPointer( V ) coefficients
+		)
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			ConstPointer( CumulativeDerivativeValues< double , Dim , PointD > ) _values = cornerStencil().data;
+			for( unsigned int i=0 ; i<size ; i++ ) 
+			{
+				int idx = indices[i];
+				if( IsActiveNode< Dim >( nodes[ idx ] ) ) for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[ idx ]->nodeData.nodeIndex ] * (Real)_values[ idx ][d];
+			}
+		};
+		auto AddToValuesExterior = [&]
+		( 
+			unsigned int size , const unsigned int* indices ,
+			LocalDepth d , LocalOffset cIdx ,
+			const typename FEMTreeNode::template ConstNeighbors< BCornerSizes >& neighbors ,
+			ConstPointer( V ) coefficients , bool parent
+		)
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			for( unsigned int i=0 ; i<size ; i++ ) 
+			{
+				int idx = indices[i];
+				if( IsActiveNode< Dim >( nodes[ idx ] ) )
+				{
+					LocalDepth _d ; LocalOffset fIdx;
+					_localDepthAndOffset( nodes[idx] , _d , fIdx );
+					CumulativeDerivativeValues< double , Dim , _PointD > _values = evaluator.template _cornerValues< _PointD >( d , fIdx , cIdx , corner , parent );
+					for( int d=0 ; d<CumulativeDerivatives< Dim , _PointD >::Size ; d++ ) values[d] += coefficients[ nodes[idx]->nodeData.nodeIndex ] * (Real)_values[d];
+				}
+			}
+		};
+		if( isInterior ) AddToValuesInterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , neighborKey.neighbors[ node->depth() ] , evaluator.stencilData[d].ccBCornerStencil[corner] , solution );
+		else             AddToValuesExterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , d , cIdx , neighborKey.neighbors[ node->depth() ] , solution , false );
+		if( d>0 )
+		{
+			int _corner = int( node - node->parent->children );
+			if( isInterior ) AddToValuesInterior( loopData.pcSize[corner][_corner] , loopData.pcIndices[corner][_corner] , neighborKey.neighbors[ node->parent->depth() ] , evaluator.stencilData[d].pcBCornerStencils[_corner][corner] , coarseSolution );
+			else             AddToValuesExterior( loopData.pcSize[corner][_corner] , loopData.pcIndices[corner][_corner] , d , cIdx , neighborKey.neighbors[ node->parent->depth() ] , coarseSolution , true );
+		}
+		// If there could be finer neighbors whose support overlaps the point
+		if( d<_maxDepth )
+		{
+			typename FEMTreeNode::template ConstNeighbors< BCornerSizes > cNeighbors;
+			if( neighborKey.getChildNeighbors( corner , node->depth() , cNeighbors ) )
+			{
+				if( isInterior ) AddToValuesInterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , cNeighbors , evaluator.stencilData[d+1].ccBCornerStencil[corner] , solution );
+				else
+				{
+					LocalDepth _d=d+1 ; LocalOffset _cIdx;
+					for( int d=0 ; d<Dim ; d++ ) _cIdx[d] = (cIdx[d]<<1) | ( (corner&(1<<d)) ? 1 : 0 );
+					AddToValuesExterior( loopData.ccSize[corner] , loopData.ccIndices[corner] , _d , _cIdx , cNeighbors , solution , false );
+				}
+			}
+		}
+	}
+	return values;
+}
+////////////////////////////
+// MultiThreadedEvaluator //
+////////////////////////////
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD , typename T >
+FEMTree< Dim , Real >::_MultiThreadedEvaluator< UIntPack< FEMSigs ... > , PointD , T >::_MultiThreadedEvaluator( const FEMTree< Dim , Real >* tree , const DenseNodeData< T , FEMSignatures >& coefficients , int threads ) : _coefficients( coefficients ) , _tree( tree )
+{
+	tree->_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	_threads = std::max< int >( 1 , threads );
+	_pointNeighborKeys.resize( _threads );
+	_cornerNeighborKeys.resize( _threads );
+	_coarseCoefficients = _tree->template coarseCoefficients< T >( _coefficients );
+	_evaluator.set( _tree->_maxDepth );
+	for( int t=0 ; t<_pointNeighborKeys.size() ; t++ ) _pointNeighborKeys[t].set( tree->_localToGlobal( _tree->_maxDepth ) );
+	for( int t=0 ; t<_cornerNeighborKeys.size() ; t++ ) _cornerNeighborKeys[t].set( tree->_localToGlobal( _tree->_maxDepth ) );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD , typename T >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< T , Dim , _PointD > FEMTree< Dim , Real >::_MultiThreadedEvaluator< UIntPack< FEMSigs ... > , PointD , T >::values( Point< Real , Dim > p , int thread , const FEMTreeNode* node )
+{
+	if( _PointD>PointD ) ERROR_OUT( "Evaluating more derivatives than available: %d <= %d" , _PointD , PointD );
+	if( !node ) node = _tree->leaf( p );
+	ConstPointSupportKey< FEMDegrees >& nKey = _pointNeighborKeys[thread];
+	nKey.getNeighbors( node );
+	return _tree->template _getValues< T , _PointD >( nKey , node , p , _coefficients() , _coarseCoefficients() , _evaluator , _tree->_maxDepth );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD , typename T >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< T , Dim , _PointD > FEMTree< Dim , Real >::_MultiThreadedEvaluator< UIntPack< FEMSigs ... > , PointD , T >::centerValues( const FEMTreeNode* node , int thread )
+{
+	if( _PointD>PointD ) ERROR_OUT( "Evaluating more derivatives than available: %d <= %d" , _PointD , PointD );
+	ConstPointSupportKey< FEMDegrees >& nKey = _pointNeighborKeys[thread];
+	nKey.getNeighbors( node );
+	LocalDepth d ; LocalOffset off;
+	_tree->_localDepthAndOffset( node->parent , d , off );
+	return _tree->template _getCenterValues< T , _PointD >( nKey , node , _coefficients() , _coarseCoefficients() , _evaluator , _tree->_maxDepth , BaseFEMIntegrator::IsInteriorlySupported( UIntPack< FEMSigs ... >() , d , off ) );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , unsigned int PointD , typename T >
+template< unsigned int _PointD >
+CumulativeDerivativeValues< T , Dim , _PointD > FEMTree< Dim , Real >::_MultiThreadedEvaluator< UIntPack< FEMSigs ... > , PointD , T >::cornerValues( const FEMTreeNode* node , int corner , int thread )
+{
+	if( _PointD>PointD ) ERROR_OUT( "Evaluating more derivatives than available: %d <= %d" , _PointD , PointD );
+	ConstCornerSupportKey< FEMDegrees >& nKey = _cornerNeighborKeys[thread];
+	nKey.getNeighbors( node );
+	LocalDepth d ; LocalOffset off;
+	_tree->_localDepthAndOffset( node->parent , d , off );
+	return _tree->template _getCornerValues< T , _PointD >( nKey , node , corner , _coefficients() , _coarseCoefficients() , _evaluator , _tree->_maxDepth , BaseFEMIntegrator::IsInteriorlySupported( UIntPack< FEMSigs ... >() , d , off ) );
+}
+
+
+
+template< unsigned int Dim , class Real >
+template< class V , class Coefficients , unsigned int D , unsigned int ... DataSigs >
+V FEMTree< Dim , Real >::_evaluate( const Coefficients& coefficients , Point< Real , Dim > p , const PointEvaluator< UIntPack< DataSigs ... > , IsotropicUIntPack< Dim , D > >& pointEvaluator , const ConstPointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey ) const
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > SupportSizes;
+	PointEvaluatorState< UIntPack< DataSigs ... > , ZeroUIntPack< Dim > > state;
+	unsigned int derivatives[Dim];
+	memset( derivatives , 0 , sizeof( derivatives ) );
+	typedef PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > > DataKey;
+	V value = V();
+
+	for( int d=_localToGlobal( 0 ) ; d<=dataKey.depth() ; d++ )
+	{
+		{
+			const FEMTreeNode* node = dataKey.neighbors[d].neighbors.data[ WindowIndex< UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > , UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportEnd ... > >::Index ];
+			if( !node ) ERROR_OUT( "Point is not centered on a node" );
+			pointEvaluator.initEvaluationState( p , _localDepth( node ) , state );
+		}
+		double scratch[Dim+1];
+		scratch[0] = 1;
+		ConstPointer( FEMTreeNode * const ) nodes = dataKey.neighbors[d].neighbors().data;
+		for( int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+		{
+			const V* v = coefficients( nodes[i] );
+			if( v )
+			{
+				LocalDepth d ; LocalOffset off ; _localDepthAndOffset( nodes[i] , d , off );
+				value += (*v) * (Real)state.value( off , derivatives );
+			}
+		}
+	}
+
+	return value;
+}
+
+template< unsigned int Dim , class Real >
+template< bool XMajor , class V , unsigned int ... DataSigs >
+Pointer( V ) FEMTree< Dim , Real >::regularGridEvaluate( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , int& res , LocalDepth depth , bool primal ) const
+{
+	if( depth<=0 ) depth = _maxDepth;
+	Pointer( V ) _coefficients = regularGridUpSample< XMajor >( coefficients , depth );
+
+	const int begin[] = { _BSplineBegin< DataSigs >( depth ) ... };
+	const int end  [] = { _BSplineEnd< DataSigs >( depth ) ... };
+	const int dim  [] = { ( _BSplineEnd< DataSigs >( depth ) - _BSplineBegin< DataSigs >( depth ) ) ... };
+
+	res = 1<<depth;
+	if( primal ) res++;
+	size_t cellCount = 1;
+	for( int d=0 ; d<Dim ; d++ ) cellCount *= res;
+	Pointer( V ) values = NewPointer< V >( cellCount );
+	memset( values , 0 , sizeof(V) * cellCount );
+
+	if( primal )
+	{
+		// evaluate at the cell corners
+		typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerSize ... > CornerSizes;
+		typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerEnd ... > CornerEnds;
+
+		EvaluationData::CornerEvaluator* evaluators[] = { ( new typename BSplineEvaluationData< DataSigs >::template CornerEvaluator< 0 >::Evaluator() ) ... };
+		for( int d=0 ; d<Dim ; d++ ) evaluators[d]->set( depth );
+		// Compute the offest from coefficient index to voxel index and the value of the stencil (if the voxel is interior)
+		StaticWindow< long long , UIntPack< ( BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerSize ? BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerSize : 1 ) ... > > offsets;
+		StaticWindow< double    , UIntPack< ( BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerSize ? BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::CornerSize : 1 ) ... > > cornerValues;
+		int dimMultiplier[Dim];
+		if( XMajor )
+		{
+			dimMultiplier[0] = 1;
+			for( int d=1 ; d<Dim ; d++ ) dimMultiplier[d] = dimMultiplier[d-1] * dim[d-1];
+		}
+		else
+		{
+			dimMultiplier[Dim-1] = 1;
+			for( int d=Dim-2 ; d>=0 ; d-- ) dimMultiplier[d] = dimMultiplier[d+1] * dim[d+1];
+		}
+
+		{
+			int center = ( 1<<depth )>>1;
+			long long offset[Dim+1] ; offset[0] = 0;
+			double upValue[Dim+1] ; upValue[0] = 1;
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , CornerSizes() ,
+				[&]( int d , int i ){ offset[d+1] = offset[d] + ( i - (int)CornerEnds::Values[d] - begin[d] ) * dimMultiplier[d] ; upValue[d+1] = upValue[d] * evaluators[d]->value( center + i - (int)CornerEnds::Values[d] , center , false ); } ,
+				[&]( long long& offsetValue , double& cornerValue ){ offsetValue = offset[Dim] , cornerValue = upValue[Dim]; } ,
+				offsets() , cornerValues()
+			);
+		}
+#pragma omp parallel for
+		for( long long c=0 ; c<(long long)cellCount ; c++ )
+		{
+			V& value = values[c];
+			int idx[Dim];
+			{
+				size_t _c = c;
+				if( XMajor ) for( int d=0 ; d<Dim ; d++ ) idx[      d] = _c % res , _c /= res;
+				else         for( int d=0 ; d<Dim ; d++ ) idx[Dim-1-d] = _c % res , _c /= res;
+			}
+			long long ii = 0;
+			for( int d=0 ; d<Dim ; d++ ) ii += idx[d] * dimMultiplier[d];
+
+			bool isInterior = true;
+			for( int d=0 ; d<Dim ; d++ ) if( ( idx[d] - (int)CornerEnds::Values[d] )<begin[d] || ( idx[d] - (int)CornerEnds::Values[d] + (int)CornerSizes::Values[d] )>=end[d] ) isInterior = false;
+
+			if( isInterior )
+			{
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] This should be modified to support 0-degree elements" )
+#endif // SHOW_WARNINGS
+				ConstPointer( long long ) offsetValues = offsets().data;
+				ConstPointer( double ) _cornerValues = cornerValues().data;
+				for( int i=0 ; i<WindowSize< CornerSizes >::Size ; i++ ) value += _coefficients[ offsetValues[i]+ii ] * (Real)_cornerValues[i];
+			}
+			else
+			{
+				double upValues[Dim+1] ; upValues[0] = 1;	// Accumulates the product of the weights
+				bool isValid[Dim+1] ; isValid[0] = true;
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , CornerSizes() ,
+					[&]( int d , int i )
+					{
+						int ii = idx[d] + i - (int)CornerEnds::Values[d];
+						if( ii>=begin[d] && ii<end[d] )
+						{
+							upValues[d+1] = upValues[d] * evaluators[d]->value( ii , idx[d] , false );
+							isValid[d+1] = isValid[d];
+						}
+						else isValid[d+1] = false;
+					} ,
+					[&]( long long offsetValue ){ if( isValid[Dim] ) value += _coefficients[ offsetValue + ii ] * (Real)upValues[Dim]; } ,
+					offsets()
+				);
+			}
+		}
+		for( int d=0 ; d<Dim ; d++ ) delete evaluators[d];
+	}
+	else
+	{
+		// evaluate at the cell centers
+		typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > SupportSizes;
+		typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportEnd ... > SupportEnds;
+
+		EvaluationData::CenterEvaluator* evaluators[] = { ( new typename BSplineEvaluationData< DataSigs >::template CenterEvaluator< 0 >::Evaluator() ) ... };
+		for( int d=0 ; d<Dim ; d++ ) evaluators[d]->set( depth );
+		// Compute the offest from coefficient index to voxel index and the value of the stencil (if the voxel is interior)
+		StaticWindow< long long , UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > > offsets;
+		StaticWindow< double    , UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > > centerValues;
+
+		int dimMultiplier[Dim];
+		if( XMajor )
+		{
+			dimMultiplier[0] = 1;
+			for( int d=1 ; d<Dim ; d++ ) dimMultiplier[d] = dimMultiplier[d-1] * dim[d-1];
+		}
+		else
+		{
+			dimMultiplier[Dim-1] = 1;
+			for( int d=Dim-2 ; d>=0 ; d-- ) dimMultiplier[d] = dimMultiplier[d+1] * dim[d+1];
+		}
+
+		{
+			int center = ( 1<<depth )>>1;
+			long long offset[Dim+1] ; offset[0] = 0;
+			double upValue[Dim+1] ; upValue[0] = 1;
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , SupportSizes() ,
+				[&]( int d , int i ){ offset[d+1] = offset[d] + ( i - (int)SupportEnds::Values[d] - begin[d] ) * dimMultiplier[d] ; upValue[d+1] = upValue[d] * evaluators[d]->value( center + i - (int)SupportEnds::Values[d] , center , false ); } ,
+				[&]( long long& offsetValue , double& centerValue ){ offsetValue = offset[Dim] , centerValue = upValue[Dim]; } ,
+				offsets() , centerValues()
+			);
+		}
+#pragma omp parallel for
+		for( long long c=0 ; c<(long long)cellCount ; c++ )
+		{
+			V& value = values[c];
+			int idx[Dim];
+			{
+				size_t _c = c;
+				if( XMajor ) for( int d=0 ; d<Dim ; d++ ) idx[      d] = _c % res , _c /= res;
+				else         for( int d=0 ; d<Dim ; d++ ) idx[Dim-1-d] = _c % res , _c /= res;
+			}
+			long long ii = 0;
+			for( int d=0 ; d<Dim ; d++ ) ii += idx[d] * dimMultiplier[d];
+
+			bool isInterior = true;
+			for( int d=0 ; d<Dim ; d++ ) if( ( idx[d] - (int)SupportEnds::Values[d] )<begin[d] || ( idx[d] - (int)SupportEnds::Values[d] + (int)SupportSizes::Values[d] )>=end[d] ) isInterior = false;
+
+			if( isInterior )
+			{
+				ConstPointer( long long ) offsetValues = offsets().data;
+				ConstPointer( double ) _centerValues = centerValues().data;
+				for( int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) value += _coefficients[ offsetValues[i] + ii ] * (Real)_centerValues[i];
+			}
+			else
+			{
+				double upValues[Dim+1] ; upValues[0] = 1;	// Accumulates the product of the weights
+				bool isValid[Dim+1] ; isValid[0] = true;
+				WindowLoop< Dim >::Run
+				(
+					ZeroUIntPack< Dim >() , SupportSizes() ,
+					[&]( int d , int i )
+					{
+						int ii = idx[d] + i - (int)SupportEnds::Values[d];
+						if( ii>=begin[d] && ii<end[d] )
+						{
+							upValues[d+1] = upValues[d] * evaluators[d]->value( ii , idx[d] , false );
+							isValid[d+1] = isValid[d];
+						}
+						else isValid[d+1] = false;
+					} ,
+					[&]( long long offsetValue ){ if( isValid[Dim] ) value += _coefficients[ offsetValue + ii ] * (Real)upValues[Dim]; } ,
+					offsets()
+				);
+			}
+		}
+		for( int d=0 ; d<Dim ; d++ ) delete evaluators[d];
+	}
+	MemoryUsage();
+	DeletePointer( _coefficients );
+
+	return values;
+}
+template< unsigned int Dim , class Real >
+template< bool XMajor , class V , unsigned int ... DataSigs >
+Pointer( V ) FEMTree< Dim , Real >::regularGridUpSample( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , LocalDepth depth ) const
+{
+	if( depth<=0 ) depth = _maxDepth;
+	int begin[Dim] , end[Dim];
+	FEMIntegrator::BSplineBegin( UIntPack< DataSigs ... >() , depth , begin );
+	FEMIntegrator::BSplineEnd  ( UIntPack< DataSigs ... >() , depth , end   );
+	return regularGridUpSample< XMajor >( coefficients , begin , end , depth );
+}
+template< unsigned int Dim , class Real >
+template< bool XMajor , class V , unsigned int ... DataSigs >
+Pointer( V ) FEMTree< Dim , Real >::regularGridUpSample( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , const int begin[Dim] , const int end[Dim] , LocalDepth depth ) const
+{
+	if( depth<=0 ) depth = _maxDepth;
+
+	static const int DownSampleStart[][sizeof...(DataSigs)] = { { BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::DownSampleStart[0] ... } , { BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::DownSampleStart[1] ... } };
+	static const int DownSampleEnd  [][sizeof...(DataSigs)] = { { BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::DownSampleEnd  [0] ... } , { BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::DownSampleEnd  [1] ... } };
+
+	struct GridDimensions
+	{
+		int begin[Dim] , end[Dim] , dim[Dim];
+		int dimMultiplier[Dim];
+		GridDimensions( void ){ }
+		GridDimensions( const int b[Dim] , const int e[Dim] )
+		{
+			memcpy( begin , b , sizeof(begin) );
+			memcpy( end , e , sizeof(end) );
+			for( int d=0 ; d<Dim ; d++ ) dim[d] = end[d] - begin[d];
+			if( XMajor )
+			{
+				dimMultiplier[0] = 1;
+				for( int d=1 ; d<Dim ; d++ ) dimMultiplier[d] = dimMultiplier[d-1] * dim[d-1];
+			}
+			else
+			{
+				dimMultiplier[Dim-1] = 1;
+				for( int d=Dim-2 ; d>=0 ; d-- ) dimMultiplier[d] = dimMultiplier[d+1] * dim[d+1];
+			}
+		}
+	};
+
+	auto SetCoarseGridDimensions = []( const GridDimensions& fine , GridDimensions& coarse , int lowDepth )
+	{
+		int begin[Dim] , end[Dim];
+		FEMIntegrator::BSplineBegin( UIntPack< DataSigs ... >() , lowDepth , begin );
+		FEMIntegrator::BSplineEnd  ( UIntPack< DataSigs ... >() , lowDepth , end   );
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			coarse.begin[d] = std::max< int >( begin[d] , (fine.begin[d]>>1) + DownSampleStart[fine.begin[d]&1][d]   );
+			coarse.end  [d] = std::min< int >( end  [d] , (fine.end  [d]>>1) + DownSampleEnd  [fine.end  [d]&1][d]+1 );
+			coarse.dim  [d] = coarse.end[d] - coarse.begin[d];
+		}
+		if( XMajor )
+		{
+			coarse.dimMultiplier[0] = 1;
+			for( int d=1 ; d<Dim ; d++ ) coarse.dimMultiplier[d] = coarse.dimMultiplier[d-1] * coarse.dim[d-1];
+		}
+		else
+		{
+			coarse.dimMultiplier[Dim-1] = 1;
+			for( int d=Dim-2 ; d>=0 ; d-- ) coarse.dimMultiplier[d] = coarse.dimMultiplier[d+1] * coarse.dim[d+1];
+		}
+	};
+	auto InBounds = []( const LocalOffset& off , const GridDimensions& gDim )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( off[d]<gDim.begin[d] || off[d]>=gDim.end[d] ) return false;
+		return true;
+	};
+
+	std::vector< GridDimensions > gridDimensions( depth+1 );
+	gridDimensions[depth] = GridDimensions( begin , end );
+	for( int d=depth ; d>0 ; d-- ) SetCoarseGridDimensions( gridDimensions[d] , gridDimensions[d-1] , d-1 );
+
+	// Initialize the coefficients at the coarsest level
+	Pointer( V ) upSampledCoefficients = NullPointer( V );
+	{
+		LocalDepth _depth = 0;
+		size_t count = 1;
+		for( int dd=0 ; dd<Dim ; dd++ ) count *= gridDimensions[_depth].dim[dd];
+		upSampledCoefficients = NewPointer< V >( count );
+		memset( upSampledCoefficients , 0 , sizeof( V ) * count );
+#pragma omp parallel for
+		for( int i=_sNodesBegin(_depth) ; i<_sNodesEnd(_depth) ; i++ ) if( !_outOfBounds( UIntPack< DataSigs ... >() , _sNodes.treeNodes[i] ) )
+		{
+			LocalDepth _d ; LocalOffset _off;
+			_localDepthAndOffset( _sNodes.treeNodes[i] , _d , _off );
+			if( InBounds( _off , gridDimensions[_depth] ) )
+			{
+				size_t idx = 0;
+				for( int d=0 ; d<Dim ; d++ ) idx += gridDimensions[_depth].dimMultiplier[d] * ( _off[d] - gridDimensions[_depth].begin[d] );
+				upSampledCoefficients[idx] = coefficients[i];
+			}
+		}
+	}
+	// Up-sample and add in the existing coefficients
+	for( LocalDepth _depth=1 ; _depth<=depth ; _depth++ )
+	{
+		size_t count = 1;
+		for( int d=0 ; d<Dim ; d++ ) count *= gridDimensions[_depth].dim[d];
+		Pointer( V ) _coefficients = NewPointer< V >( count );
+		memset( _coefficients , 0 , sizeof( V ) * count );
+		if( _depth<=_maxDepth )
+#pragma omp parallel for
+			for( int i=_sNodesBegin(_depth) ; i<_sNodesEnd(_depth) ; i++ ) if( !_outOfBounds( UIntPack< DataSigs ... >() , _sNodes.treeNodes[i] ) )
+			{
+				LocalDepth _d ; LocalOffset _off;
+				_localDepthAndOffset( _sNodes.treeNodes[i] , _d , _off );
+				if( InBounds( _off , gridDimensions[_depth] ) )
+				{
+					size_t idx = 0;
+					for( int d=0 ; d<Dim ; d++ ) idx += gridDimensions[_depth].dimMultiplier[d] * ( _off[d] - gridDimensions[_depth].begin[d] );
+					_coefficients[idx] = coefficients[i];
+				}
+			}
+		_RegularGridUpSample< XMajor >( UIntPack< DataSigs ... >() , gridDimensions[_depth-1].begin , gridDimensions[_depth-1].end , gridDimensions[_depth].begin , gridDimensions[_depth].end , _depth , ( ConstPointer(V) )upSampledCoefficients , _coefficients );
+		DeletePointer( upSampledCoefficients );
+		upSampledCoefficients = _coefficients;
+	}
+	return upSampledCoefficients;
+}
+template< unsigned int Dim , class Real >
+template< class V , unsigned int ... DataSigs >
+V FEMTree< Dim , Real >::average( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients ) const
+{
+	Real begin[Dim] , end[Dim];
+	for( int d=0 ; d<Dim ; d++ ) begin[d] = (Real)0. , end[d] = (Real)1.;
+	return average( coefficients , begin , end );
+}
+template< unsigned int Dim , class Real >
+template< class V , unsigned int ... DataSigs >
+V FEMTree< Dim , Real >::average( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , const Real begin[Dim] , const Real end[Dim] ) const
+{
+	_setFEM1ValidityFlags( UIntPack< DataSigs ... >() );
+	std::vector< V > avgs( omp_get_max_threads() );
+	for( int i=0 ; i<avgs.size() ; i++ ) avgs[i] = {};
+	double _begin[Dim] , _end[Dim];
+	for( int d=0 ; d<Dim ; d++ ) _begin[d] = begin[d] , _end[d] = end[d];
+	for( int d=0 ; d<=_maxDepth ; d++ )
+	{
+		int center = ( 1<<d )>>1;
+		int off[Dim];
+		double __begin[Dim] , __end[Dim];
+		for( int dd=0 ; dd<Dim ; dd++ ) off[dd] = center , __begin[dd] = 0 , __end[dd] = 1;
+		double integral = FEMIntegrator::Integral( UIntPack< DataSigs ... >() , d , off , __begin , __end );
+#pragma omp parallel for
+		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+		{
+			int d , off[Dim];
+			_localDepthAndOffset( _sNodes.treeNodes[i] , d , off );
+			if( BaseFEMIntegrator::IsInteriorlySupported( UIntPack< FEMSignature< DataSigs >::Degree ... >() , d , off , _begin , _end ) ) avgs[ omp_get_thread_num() ] += (V)( coefficients[i] * (Real)integral );
+			else                                                                                                                           avgs[ omp_get_thread_num() ] += (V)( coefficients[i] * (Real)FEMIntegrator::Integral( UIntPack< DataSigs ... >() , d , off , _begin , _end ) );
+		}
+	}
+	V avg = {};
+	for( int i=0 ; i<avgs.size() ; i++ ) avg += avgs[i];
+	Real scale = (Real)1.;
+	for( int d=0 ; d<Dim ; d++ ) scale *= end[d] - begin[d];
+	return avg / scale;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int PointD , unsigned int ... FEMSigs >
+SparseNodeData< CumulativeDerivativeValues< Real , Dim , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > FEMTree< Dim , Real >::leafValues( const DenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients , int maxDepth ) const
+{
+	if( maxDepth<0 ) maxDepth = _maxDepth;
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	SparseNodeData< CumulativeDerivativeValues< Real , Dim , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > values;
+	DenseNodeData< Real , UIntPack< FEMSigs ... > > _coefficients = coarseCoefficients< Real >( coefficients );
+	_Evaluator< UIntPack< FEMSigs ... > , PointD > evaluator;
+	evaluator.set( maxDepth );
+	for( LocalDepth d=maxDepth ; d>=0 ; d-- )
+	{
+		std::vector< ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > > > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
+		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
+		{
+			ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			FEMTreeNode* node = _sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( node->children ) || d==maxDepth )
+			{
+				neighborKey.getNeighbors( node );
+				bool isInterior = _isInteriorlySupported( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , node->parent );
+				values[ node ] = _getCenterValues< Real , PointD >( neighborKey , node , coefficients() , _coefficients() , evaluator , maxDepth , isInterior );
+			}
+		}
+	}
+	return values;
+}
diff --git a/Src/FEMTree.Initialize.inl b/Src/FEMTree.Initialize.inl
new file mode 100644
index 0000000..21237fd
--- /dev/null
+++ b/Src/FEMTree.Initialize.inl
@@ -0,0 +1,591 @@
+/*
+Copyright (c) 2016, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+////////////////////////
+// FEMTreeInitializer //
+////////////////////////
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& node , int maxDepth , std::function< bool ( int , int[] ) > Refine , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	int count = 0;
+	int d , off[3];
+	node.depthAndOffset( d , off );
+	if( node.depth()<maxDepth && Refine( d , off ) )
+	{
+		node.initChildren( nodeAllocator , NodeInitializer ) , count += 1<<Dim;
+		for( int c=0 ; c<(1<<Dim) ; c++ ) count += Initialize( node.children[c] , maxDepth , Refine , nodeAllocator , NodeInitializer );
+	}
+	return count;
+}
+
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , InputPointStream< Real , Dim >& pointStream , int maxDepth , std::vector< PointSample >& samplePoints , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	auto Leaf = [&]( FEMTreeNode& root , Point< Real , Dim > p , int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]<0 || p[d]>1 ) return (FEMTreeNode*)NULL;
+		Point< Real , Dim > center;
+		for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+		Real width = Real(1.0);
+		FEMTreeNode* node = &root;
+		int d = 0;
+		while( d<maxDepth )
+		{
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = FEMTreeNode::ChildIndex( center , p );
+			node = node->children + cIndex;
+			d++;
+			width /= 2;
+			for( int dd=0 ; dd<Dim ; dd++ )
+				if( (cIndex>>dd) & 1 ) center[dd] += width/2;
+				else                   center[dd] -= width/2;
+		}
+		return node;
+	};
+
+	// Add the point data
+	int outOfBoundPoints = 0 , pointCount = 0;
+	{
+		std::vector< int > nodeToIndexMap;
+		Point< Real , Dim > p;
+		while( pointStream.nextPoint( p ) )
+		{
+			Real weight = (Real)1.;
+			FEMTreeNode* temp = Leaf( root , p , maxDepth );
+			if( !temp ){ outOfBoundPoints++ ; continue; }
+			int nodeIndex = temp->nodeData.nodeIndex;
+			if( nodeIndex>=nodeToIndexMap.size() ) nodeToIndexMap.resize( nodeIndex+1 , -1 );
+			int idx = nodeToIndexMap[ nodeIndex ];
+			if( idx==-1 )
+			{
+				idx = (int)samplePoints.size();
+				nodeToIndexMap[ nodeIndex ] = idx;
+				samplePoints.resize( idx+1 ) , samplePoints[idx].node = temp;
+			}
+			samplePoints[idx].sample += ProjectiveData< Point< Real , Dim > , Real >( p*weight , weight );
+			pointCount++;
+		}
+		pointStream.reset();
+	}
+	if( outOfBoundPoints  ) WARN( "Found out-of-bound points: %d" , outOfBoundPoints );
+	FEMTree< Dim , Real >::MemoryUsage();
+	return pointCount;
+}
+
+template< unsigned int Dim , class Real >
+template< class Data >
+int FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , InputPointStreamWithData< Real , Dim , Data >& pointStream , int maxDepth , std::vector< PointSample >& samplePoints , std::vector< Data >& sampleData , bool mergeNodeSamples , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer , std::function< Real ( const Point< Real , Dim >& , Data& ) > ProcessData )
+{
+	auto Leaf = [&]( FEMTreeNode& root , Point< Real , Dim > p , int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]<0 || p[d]>1 ) return (FEMTreeNode*)NULL;
+		Point< Real , Dim > center;
+		for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+		Real width = Real(1.0);
+		FEMTreeNode* node = &root;
+		int d = 0;
+		while( d<maxDepth )
+		{
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = FEMTreeNode::ChildIndex( center , p );
+			node = node->children + cIndex;
+			d++;
+			width /= 2;
+			for( int dd=0 ; dd<Dim ; dd++ )
+				if( (cIndex>>dd) & 1 ) center[dd] += width/2;
+				else                   center[dd] -= width/2;
+		}
+		return node;
+	};
+
+	// Add the point data
+	int outOfBoundPoints = 0 , badData = 0 , pointCount = 0;
+	{
+		std::vector< int > nodeToIndexMap;
+		Point< Real , Dim > p;
+		Data d;
+
+		while( pointStream.nextPoint( p , d ) )
+		{
+			Real weight = ProcessData( p , d );
+			if( weight<=0 ){ badData++ ; continue; }
+			FEMTreeNode* temp = Leaf( root , p , maxDepth );
+			if( !temp ){ outOfBoundPoints++ ; continue; }
+			int nodeIndex = temp->nodeData.nodeIndex;
+			if( mergeNodeSamples )
+			{
+				if( nodeIndex>=nodeToIndexMap.size() ) nodeToIndexMap.resize( nodeIndex+1 , -1 );
+				int idx = nodeToIndexMap[ nodeIndex ];
+				if( idx==-1 )
+				{
+					idx = (int)samplePoints.size();
+					nodeToIndexMap[ nodeIndex ] = idx;
+					samplePoints.resize( idx+1 ) , samplePoints[idx].node = temp;
+					sampleData.resize( idx+1 );
+				}
+				samplePoints[idx].sample += ProjectiveData< Point< Real , Dim > , Real >( p*weight , weight );
+				sampleData[ idx ] += d*weight;
+			}
+			else
+			{
+				int idx = (int)samplePoints.size();
+				samplePoints.resize( idx+1 ) , sampleData.resize( idx+1 );
+				samplePoints[idx].node = temp;
+				samplePoints[idx].sample = ProjectiveData< Point< Real , Dim > , Real >( p*weight , weight );
+				sampleData[ idx ] = d*weight;
+			}
+			pointCount++;
+		}
+		pointStream.reset();
+	}
+	if( outOfBoundPoints  ) WARN( "Found out-of-bound points: %d" , outOfBoundPoints );
+	if( badData           ) WARN( "Found bad data: %d" , badData );
+	FEMTree< Dim , Real >::MemoryUsage();
+	return pointCount;
+}
+template< unsigned int Dim , class Real >
+void FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , const std::vector< Point< Real , Dim > >& vertices , const std::vector< SimplexIndex< Dim-1 > >& simplices , int maxDepth , std::vector< PointSample >& samples , bool mergeNodeSamples , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	std::vector< int > nodeToIndexMap;
+#pragma omp parallel for
+	for( int i=0 ; i<simplices.size() ; i++ )
+	{
+		Simplex< Real , Dim , Dim-1 > s;
+		for( int k=0 ; k<Dim ; k++ ) s[k] = vertices[ simplices[i][k] ];
+		int sCount;
+		if( mergeNodeSamples ) sCount = _AddSimplex( root , s , maxDepth , samples , &nodeToIndexMap , nodeAllocator , NodeInitializer );
+		else                   sCount = _AddSimplex( root , s , maxDepth , samples , NULL ,            nodeAllocator , NodeInitializer );
+	}
+	FEMTree< Dim , Real >::MemoryUsage();
+}
+
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::_AddSimplex( FEMTreeNode& root , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< PointSample >& samples , std::vector< int >* nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	std::vector< Simplex< Real , Dim , Dim-1 > > subSimplices;
+	subSimplices.push_back( s );
+
+	// Clip the simplex to the unit cube
+	{
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			Point< Real , Dim > n;
+			n[d] = 1;
+			{
+				std::vector< Simplex< Real , Dim , Dim-1 > > back , front;
+				for( int i=0 ; i<subSimplices.size() ; i++ ) subSimplices[i].split( n , 0 , back , front );
+				subSimplices = front;
+			}
+			{
+				std::vector< Simplex< Real , Dim , Dim-1 > > back , front;
+				for( int i=0 ; i<subSimplices.size() ; i++ ) subSimplices[i].split( n , 1 , back , front );
+				subSimplices = back;
+			}
+		}
+	}
+
+	struct RegularGridIndex
+	{
+		int idx[Dim];
+		bool operator != ( const RegularGridIndex& i ) const
+		{
+			for( int d=0 ; d<Dim ; d++ ) if( idx[d]!=i.idx[d] ) return true;
+			return false;
+		}
+	};
+
+	auto Leaf = [&]( Point< Real , Dim > p , int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]<0 || p[d]>1 ) return (FEMTreeNode*)NULL;
+		Point< Real , Dim > center;
+		for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+		Real width = Real(1.0);
+		FEMTreeNode* node = &root;
+		int d=0;
+		while( d<maxDepth )
+		{
+#pragma omp critical
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = FEMTreeNode::ChildIndex( center , p );
+			node = node->children + cIndex;
+			d++;
+			width /= 2;
+			for( int d=0 ; d<Dim ; d++ )
+				if( (cIndex>>d) & 1 ) center[d] += width/2;
+				else                  center[d] -= width/2;
+		}
+		return node;
+	};
+
+
+	int sCount = 0;
+	for( int i=0 ; i<subSimplices.size() ; i++ )
+	{
+		// Find the finest depth at which the simplex is entirely within a node
+		int tDepth;
+		RegularGridIndex idx0 , idx;
+		for( tDepth=0 ; tDepth<maxDepth ; tDepth++ )
+		{
+			// Get the grid index of the first vertex of the simplex
+			for( int d=0 ; d<Dim ; d++ ) idx0.idx[d] = idx.idx[d] = (int)( subSimplices[i][0][d] * (1<<(tDepth+1)) );
+			bool done = false;
+			for( int k=0 ; k<Dim && !done ; k++ )
+			{
+				for( int d=0 ; d<Dim ; d++ ) idx.idx[d] = (int)( subSimplices[i][k][d] * (1<<(tDepth+1)) );
+				if( idx!=idx0 ) done = true;
+			}
+			if( done ) break;
+		}
+
+		// Generate a point in the middle of the simplex
+		for( int i=0 ; i<subSimplices.size() ; i++ ) sCount += _AddSimplex( Leaf( subSimplices[i].center() , tDepth ) , subSimplices[i] , maxDepth , samples , nodeToIndexMap , nodeAllocator , NodeInitializer );
+	}
+	return sCount;
+}
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::_AddSimplex( FEMTreeNode* node , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< PointSample >& samples , std::vector< int >* nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	int d = node->depth();
+	if( d==maxDepth )
+	{
+		Real weight = s.measure();
+		Point< Real , Dim > position = s.center() , normal;
+		{
+			Point< Real , Dim > v[Dim-1];
+			for( int k=0 ; k<Dim-1 ; k++ ) v[k] = s[k+1]-s[0];
+			normal = Point< Real , Dim >::CrossProduct( v );
+		}
+		if( weight && weight==weight )
+		{
+			if( nodeToIndexMap )
+			{
+				int nodeIndex = node->nodeData.nodeIndex;
+#pragma omp critical
+				{
+					if( nodeIndex>=nodeToIndexMap->size() ) nodeToIndexMap->resize( nodeIndex+1 , -1 );
+					int idx = (*nodeToIndexMap)[ nodeIndex ];
+					if( idx==-1 )
+					{
+						idx = (int)samples.size();
+						(*nodeToIndexMap)[ nodeIndex ] = idx;
+						samples.resize( idx+1 );
+						samples[idx].node = node;
+					}
+					samples[idx].sample += ProjectiveData< Point< Real , Dim > , Real >( position*weight , weight );
+				}
+			}
+			else
+			{
+#pragma omp critical
+				{
+					int idx = (int)samples.size();
+					samples.resize( idx+1 );
+					samples[idx].node = node;
+					samples[idx].sample = ProjectiveData< Point< Real , Dim > , Real >( position*weight , weight );
+				}
+			}
+		}
+		return 1;
+	}
+	else
+	{
+		int sCount = 0;
+#pragma omp critical
+		if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+
+		// Split up the simplex and pass the parts on to the children
+		Point< Real , Dim > center;
+		Real width;
+		node->centerAndWidth( center , width );
+
+		std::vector< std::vector< Simplex< Real , Dim , Dim-1 > > > childSimplices( 1 );
+		childSimplices[0].push_back( s );
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			Point< Real , Dim > n ; n[Dim-d-1] = 1;
+			std::vector< std::vector< Simplex< Real , Dim , Dim-1 > > > temp( (int)( 1<<(d+1) ) );
+			for( int c=0 ; c<(1<<d) ; c++ ) for( int i=0 ; i<childSimplices[c].size() ; i++ ) childSimplices[c][i].split( n , center[Dim-d-1] , temp[2*c] , temp[2*c+1] );
+			childSimplices = temp;
+		}
+		for( int c=0 ; c<(1<<Dim) ; c++ ) for( int i=0 ; i<childSimplices[c].size() ; i++ ) if( childSimplices[c][i].measure() ) sCount += _AddSimplex( node->children+c , childSimplices[c][i] , maxDepth , samples , nodeToIndexMap , nodeAllocator , NodeInitializer );
+		return sCount;
+	}
+}
+
+template< unsigned int Dim , class Real >
+void FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , const std::vector< Point< Real , Dim > >& vertices , const std::vector< SimplexIndex< Dim-1 > >& simplices , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& nodeSimplices , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	std::vector< int > nodeToIndexMap;
+	for( int i=0 ; i<simplices.size() ; i++ )
+	{
+		Simplex< Real , Dim , Dim-1 > s;
+		for( int k=0 ; k<Dim ; k++ ) s[k] = vertices[ simplices[i][k] ];
+		_AddSimplex( root , s , maxDepth , nodeSimplices , nodeToIndexMap , nodeAllocator , NodeInitializer );
+	}
+	FEMTree< Dim , Real >::MemoryUsage();
+}
+
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::_AddSimplex( FEMTreeNode& root , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& simplices , std::vector< int >& nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	std::vector< Simplex< Real , Dim , Dim-1 > > subSimplices;
+	subSimplices.push_back( s );
+
+	// Clip the simplex to the unit cube
+	{
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			Point< Real , Dim > n;
+			n[d] = 1;
+			{
+				std::vector< Simplex< Real , Dim , Dim-1 > > back , front;
+				for( int i=0 ; i<subSimplices.size() ; i++ ) subSimplices[i].split( n , 0 , back , front );
+				subSimplices = front;
+			}
+			{
+				std::vector< Simplex< Real , Dim , Dim-1 > > back , front;
+				for( int i=0 ; i<subSimplices.size() ; i++ ) subSimplices[i].split( n , 1 , back , front );
+				subSimplices = back;
+			}
+		}
+	}
+
+	struct RegularGridIndex
+	{
+		int idx[Dim];
+		bool operator != ( const RegularGridIndex& i ) const
+		{
+			for( int d=0 ; d<Dim ; d++ ) if( idx[d]!=i.idx[d] ) return true;
+			return false;
+		}
+	};
+
+	auto Leaf = [&]( Point< Real , Dim > p , int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]<0 || p[d]>1 ) return (FEMTreeNode*)NULL;
+		Point< Real , Dim > center;
+		for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+		Real width = Real(1.0);
+		FEMTreeNode* node = &root;
+		int d=0;
+		while( d<maxDepth )
+		{
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = FEMTreeNode::ChildIndex( center , p );
+			node = node->children + cIndex;
+			d++;
+			width /= 2;
+			for( int d=0 ; d<Dim ; d++ )
+				if( (cIndex>>d) & 1 ) center[d] += width/2;
+				else                  center[d] -= width/2;
+		}
+		return node;
+	};
+
+	int sCount = 0;
+
+	for( int i=0 ; i<subSimplices.size() ; i++ )
+	{
+		// Find the finest depth at which the simplex is entirely within a node
+		int tDepth;
+		RegularGridIndex idx0 , idx;
+		for( tDepth=0 ; tDepth<maxDepth ; tDepth++ )
+		{
+			// Get the grid index of the first vertex of the simplex
+			for( int d=0 ; d<Dim ; d++ ) idx0.idx[d] = (int)( subSimplices[i][0][d] * (1<<(tDepth+1)) );
+			bool done = false;
+			for( int k=0 ; k<Dim && !done ; k++ )
+			{
+				for( int d=0 ; d<Dim ; d++ ) idx.idx[d] = (int)( subSimplices[i][k][d] * (1<<(tDepth+1)) );
+				if( idx!=idx0 ) done = true;
+			}
+			if( done ) break;
+		}
+
+		// Add the simplex to the node
+		FEMTreeNode* subSimplexNode = Leaf( subSimplices[i].center() , tDepth );
+		for( int i=0 ; i<subSimplices.size() ; i++ ) sCount += _AddSimplex( subSimplexNode , subSimplices[i] , maxDepth , simplices , nodeToIndexMap , nodeAllocator , NodeInitializer );
+	}
+	return sCount;
+}
+template< unsigned int Dim , class Real >
+int FEMTreeInitializer< Dim , Real >::_AddSimplex( FEMTreeNode* node , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& simplices , std::vector< int >& nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	int d = node->depth();
+	if( d==maxDepth )
+	{
+		// If the simplex has non-zero size, add it to the list
+		Real weight = s.measure();
+		if( weight && weight==weight )
+		{
+			int nodeIndex = node->nodeData.nodeIndex;
+			if( nodeIndex>=nodeToIndexMap.size() ) nodeToIndexMap.resize( nodeIndex+1 , -1 );
+			int idx = nodeToIndexMap[ nodeIndex ];
+			if( idx==-1 )
+			{
+				idx = (int)simplices.size();
+				nodeToIndexMap[ nodeIndex ] = idx;
+				simplices.resize( idx+1 );
+				simplices[idx].node = node;
+			}
+			simplices[idx].data.push_back( s );
+		}
+		return 1;
+	}
+	else
+	{
+		int sCount = 0;
+		if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+
+		// Split up the simplex and pass the parts on to the children
+		Point< Real , Dim > center;
+		Real width;
+		node->centerAndWidth( center , width );
+
+		std::vector< std::vector< Simplex< Real , Dim , Dim-1 > > > childSimplices( 1 );
+		childSimplices[0].push_back( s );
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			Point< Real , Dim > n ; n[Dim-d-1] = 1;
+			std::vector< std::vector< Simplex< Real , Dim , Dim-1 > > > temp( (int)( 1<<(d+1) ) );
+			for( int c=0 ; c<(1<<d) ; c++ ) for( int i=0 ; i<childSimplices[c].size() ; i++ ) childSimplices[c][i].split( n , center[Dim-d-1] , temp[2*c] , temp[2*c+1] );
+			childSimplices = temp;
+		}
+		for( int c=0 ; c<(1<<Dim) ; c++ ) for( int i=0 ; i<childSimplices[c].size() ; i++ ) sCount += _AddSimplex( node->children+c , childSimplices[c][i] , maxDepth , simplices , nodeToIndexMap , nodeAllocator , NodeInitializer );
+		return sCount;
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< class Data , class _Data , bool Dual >
+int FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , ConstPointer( Data ) values , ConstPointer( int ) labels , int resolution[Dim] , std::vector< NodeSample< Dim , _Data > > derivatives[Dim] , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer , std::function< _Data ( const Data& ) > DataConverter )
+{
+	auto Leaf = [&]( FEMTreeNode& root , const int idx[Dim] , int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( idx[d]<0 || idx[d]>=(1<<maxDepth) ) return (FEMTreeNode*)NULL;
+		FEMTreeNode* node = &root;
+		for( int d=0 ; d<maxDepth ; d++ )
+		{
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = 0;
+			for( int dd=0 ; dd<Dim ; dd++ ) if( idx[dd]&(1<<(maxDepth-d-1)) ) cIndex |= 1<<dd;
+			node = node->children + cIndex;
+		}
+		return node;
+	};
+	auto FactorIndex = []( size_t i , const int resolution[Dim] , int idx[Dim] )
+	{
+		size_t ii = i;
+		for( int d=0 ; d<Dim ; d++ ) idx[d] = ii % resolution[d] , ii /= resolution[d];
+	};
+	auto MakeIndex = [] ( const int idx[Dim] , const int resolution[Dim] )
+	{
+		size_t i = 0;
+		for( int d=0 ; d<Dim ; d++ ) i = i * resolution[Dim-1-d] + idx[Dim-1-d];
+		return i;
+	};
+
+
+	int maxResolution = resolution[0];
+	for( int d=1 ; d<Dim ; d++ ) maxResolution = std::max< int >( maxResolution , resolution[d] );
+	int maxDepth = 0;
+	while( ( (1<<maxDepth) + ( Dual ? 0 : 1 ) )<maxResolution ) maxDepth++;
+
+	size_t totalRes = 1;
+	for( int d=0 ; d<Dim ; d++ ) totalRes *= resolution[d];
+
+	// Iterate over each direction
+	for( int d=0 ; d<Dim ; d++ ) for( size_t i=0 ; i<totalRes ; i++ )
+	{
+		// Factor the index into directional components and get the index of the next cell
+		int idx[Dim] ; FactorIndex( i , resolution , idx ) ; idx[d]++;
+
+		if( idx[d]<resolution[d] )
+		{
+			// Get the index of the next cell
+			size_t ii = MakeIndex( idx , resolution );
+
+			// [NOTE] There are no derivatives across negative labels
+			if( labels[i]!=labels[ii] && labels[i]>=0 && labels[ii]>=0 )
+			{
+				if( !Dual ) idx[d]--;
+				NodeSample< Dim , _Data > nodeSample;
+				nodeSample.node = Leaf( root , idx , maxDepth );
+				nodeSample.data = DataConverter( values[ii] ) - DataConverter( values[i] );
+				if( nodeSample.node ) derivatives[d].push_back( nodeSample );
+			}
+		}
+	}
+	return maxDepth;
+}
+
+template< unsigned int Dim , class Real >
+template< bool Dual , class Data >
+unsigned int FEMTreeInitializer< Dim , Real >::Initialize( FEMTreeNode& root , DerivativeStream< Data >& dStream , std::vector< NodeSample< Dim , Data > > derivatives[Dim] , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer )
+{
+	// Note:
+	// --   Dual: The difference between [i] and [i+1] is stored at cell [i+1]
+	// -- Primal: The difference between [i] and [i+1] is stored at cell [i]
+
+	// Find the leaf containing the specified cell index
+	auto Leaf = [&]( FEMTreeNode& root , const unsigned int idx[Dim] , unsigned int maxDepth )
+	{
+		for( int d=0 ; d<Dim ; d++ ) if( idx[d]<0 || idx[d]>=(unsigned int)(1<<maxDepth) ) return (FEMTreeNode*)NULL;
+		FEMTreeNode* node = &root;
+		for( unsigned int d=0 ; d<maxDepth ; d++ )
+		{
+			if( !node->children ) node->initChildren( nodeAllocator , NodeInitializer );
+			int cIndex = 0;
+			for( int dd=0 ; dd<Dim ; dd++ ) if( idx[dd]&(1<<(maxDepth-d-1)) ) cIndex |= 1<<dd;
+			node = node->children + cIndex;
+		}
+		return node;
+	};
+
+	unsigned int resolution[Dim];
+	dStream.resolution( resolution );
+	unsigned int maxResolution = resolution[0];
+	for( int d=1 ; d<Dim ; d++ ) maxResolution = std::max< unsigned int >( maxResolution , resolution[d] );
+	unsigned int maxDepth = 0;
+
+	// If we are using a dual formulation, we need at least maxResolution cells.
+	// Otherwise, we need at least maxResolution-1 cells.
+	while( (unsigned int)( (1<<maxDepth) + ( Dual ? 0 : 1 ) )<maxResolution ) maxDepth++;
+
+	unsigned int idx[Dim] , dir;
+	Data dValue;
+	while( dStream.nextDerivative( idx , dir , dValue ) )
+	{
+		if( Dual ) idx[dir]++;
+		NodeSample< Dim , Data > nodeSample;
+		nodeSample.node = Leaf( root , idx , maxDepth );
+		nodeSample.data = dValue;
+		if( nodeSample.node ) derivatives[dir].push_back( nodeSample );
+	}
+	return maxDepth;
+}
diff --git a/Src/FEMTree.IsoSurface.specialized.inl b/Src/FEMTree.IsoSurface.specialized.inl
new file mode 100644
index 0000000..4bc2f36
--- /dev/null
+++ b/Src/FEMTree.IsoSurface.specialized.inl
@@ -0,0 +1,1804 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#include <sstream>
+#include <iomanip>
+#include <unordered_map>
+#include "MyMiscellany.h"
+#include "MarchingCubes.h"
+#include "MAT.h"
+
+
+// Specialized iso-surface extraction
+template< class Real , class Vertex >
+struct IsoSurfaceExtractor< 3 , Real , Vertex >
+{
+	static const unsigned int Dim = 3;
+	typedef typename FEMTree< Dim , Real >::LocalDepth LocalDepth;
+	typedef typename FEMTree< Dim , Real >::LocalOffset LocalOffset;
+	typedef typename FEMTree< Dim , Real >::ConstOneRingNeighborKey ConstOneRingNeighborKey;
+	typedef typename FEMTree< Dim , Real >::ConstOneRingNeighbors ConstOneRingNeighbors;
+	typedef RegularTreeNode< Dim , FEMTreeNodeData > TreeNode;
+	template< unsigned int WeightDegree > using DensityEstimator = typename FEMTree< Dim , Real >::template DensityEstimator< WeightDegree >;
+	template< typename FEMSigPack , unsigned int PointD > using _Evaluator = typename FEMTree< Dim , Real >::template _Evaluator< FEMSigPack , PointD >;
+protected:
+	//////////////
+	// _IsoEdge //
+	//////////////
+	struct _IsoEdge
+	{
+		long long edges[2];
+		_IsoEdge( void ){ edges[0] = edges[1] = 0; }
+		_IsoEdge( long long v1 , long long v2 ){ edges[0] = v1 , edges[1] = v2; }
+		long long& operator[]( int idx ){ return edges[idx]; }
+		const long long& operator[]( int idx ) const { return edges[idx]; }
+	};
+
+	////////////////
+	// _FaceEdges //
+	////////////////
+	struct _FaceEdges{ _IsoEdge edges[2] ; int count; };
+
+	///////////////
+	// SliceData //
+	///////////////
+	class SliceData
+	{
+		typedef RegularTreeNode< Dim , FEMTreeNodeData > TreeOctNode;
+	public:
+		template< unsigned int Indices >
+		struct  _Indices
+		{
+			int idx[Indices];
+			_Indices( void ){ memset( idx , -1 , sizeof( int ) * Indices ); }
+			int& operator[] ( int i ) { return idx[i]; }
+			const int& operator[] ( int i ) const { return idx[i]; }
+		};
+		typedef _Indices< HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() > SquareCornerIndices;
+		typedef _Indices< HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() > SquareEdgeIndices;
+		typedef _Indices< HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() > SquareFaceIndices;
+
+		struct SliceTableData
+		{
+			Pointer( SquareCornerIndices ) cTable;
+			Pointer( SquareEdgeIndices   ) eTable;
+			Pointer( SquareFaceIndices   ) fTable;
+			int cCount , eCount , fCount , nodeOffset , nodeCount;
+			SliceTableData( void ){ fCount = eCount = cCount = _oldNodeCount = 0 , cTable = NullPointer( SquareCornerIndices ) , eTable = NullPointer( SquareEdgeIndices ) , fTable = NullPointer( SquareFaceIndices ) , _cMap = _eMap = _fMap = NullPointer( int ) , _processed = NullPointer( char ); }
+			void clear( void ){ DeletePointer( cTable ) ; DeletePointer( eTable ) ; DeletePointer( fTable ) ; DeletePointer( _cMap ) ; DeletePointer( _eMap ) ; DeletePointer( _fMap ) ; DeletePointer( _processed ) ; fCount = eCount = cCount = 0; }
+			~SliceTableData( void ){ clear(); }
+
+			SquareCornerIndices& cornerIndices( const TreeOctNode* node ) { return cTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			SquareCornerIndices& cornerIndices( int idx ) { return cTable[ idx - nodeOffset ]; }
+			const SquareCornerIndices& cornerIndices( const TreeOctNode* node ) const { return cTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			const SquareCornerIndices& cornerIndices( int idx ) const { return cTable[ idx - nodeOffset ]; }
+			SquareEdgeIndices& edgeIndices( const TreeOctNode* node ) { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			SquareEdgeIndices& edgeIndices( int idx ) { return eTable[ idx - nodeOffset ]; }
+			const SquareEdgeIndices& edgeIndices( const TreeOctNode* node ) const { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			const SquareEdgeIndices& edgeIndices( int idx ) const { return eTable[ idx - nodeOffset ]; }
+			SquareFaceIndices& faceIndices( const TreeOctNode* node ) { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			SquareFaceIndices& faceIndices( int idx ) { return fTable[ idx - nodeOffset ]; }
+			const SquareFaceIndices& faceIndices( const TreeOctNode* node ) const { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			const SquareFaceIndices& faceIndices( int idx ) const { return fTable[ idx - nodeOffset ]; }
+
+		protected:
+			Pointer( int ) _cMap;
+			Pointer( int ) _eMap;
+			Pointer( int ) _fMap;
+			Pointer( char ) _processed;
+			int _oldNodeCount;
+			friend SliceData;
+		};
+		struct XSliceTableData
+		{
+			Pointer( SquareCornerIndices ) eTable;
+			Pointer( SquareEdgeIndices ) fTable;
+			int fCount , eCount , nodeOffset , nodeCount;
+			XSliceTableData( void ){ fCount = eCount = _oldNodeCount = 0 , eTable = NullPointer( SquareCornerIndices ) , fTable = NullPointer( SquareEdgeIndices ) , _eMap = _fMap = NullPointer( int ); }
+			~XSliceTableData( void ){ clear(); }
+			void clear( void ) { DeletePointer( fTable ) ; DeletePointer( eTable ) ; DeletePointer( _eMap ) ; DeletePointer( _fMap ) ; fCount = eCount = 0; }
+
+			SquareCornerIndices& edgeIndices( const TreeOctNode* node ) { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			SquareCornerIndices& edgeIndices( int idx ) { return eTable[ idx - nodeOffset ]; }
+			const SquareCornerIndices& edgeIndices( const TreeOctNode* node ) const { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			const SquareCornerIndices& edgeIndices( int idx ) const { return eTable[ idx - nodeOffset ]; }
+			SquareEdgeIndices& faceIndices( const TreeOctNode* node ) { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			SquareEdgeIndices& faceIndices( int idx ) { return fTable[ idx - nodeOffset ]; }
+			const SquareEdgeIndices& faceIndices( const TreeOctNode* node ) const { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
+			const SquareEdgeIndices& faceIndices( int idx ) const { return fTable[ idx - nodeOffset ]; }
+		protected:
+			Pointer( int ) _eMap;
+			Pointer( int ) _fMap;
+			int _oldNodeCount;
+			friend SliceData;
+		};
+		template< unsigned int D , unsigned int ... Ks > struct HyperCubeTables{};
+		template< unsigned int D , unsigned int K >
+		struct HyperCubeTables< D , K >
+		{
+			static unsigned int CellOffset[ HyperCube::Cube< D >::template ElementNum< K >() ][ HyperCube::Cube< D >::template IncidentCubeNum< K >() ];
+			static unsigned int IncidentElementCoIndex[ HyperCube::Cube< D >::template ElementNum< K >() ][ HyperCube::Cube< D >::template IncidentCubeNum< K >() ];
+			static unsigned int CellOffsetAntipodal[ HyperCube::Cube< D >::template ElementNum< K >() ];
+			static typename HyperCube::Cube< D >::template IncidentCubeIndex< K > IncidentCube[ HyperCube::Cube< D >::template ElementNum< K >() ];
+			static typename HyperCube::Direction Directions[ HyperCube::Cube< D >::template ElementNum< K >() ][ D ];
+			static void SetTables( void )
+			{
+				for( typename HyperCube::Cube< D >::template Element< K > e ; e<HyperCube::Cube< D >::template ElementNum< K >() ; e++ )
+				{
+					for( typename HyperCube::Cube< D >::template IncidentCubeIndex< K > i ; i<HyperCube::Cube< D >::template IncidentCubeNum< K >() ; i++ )
+					{
+						CellOffset[e.index][i.index] = HyperCube::Cube< D >::CellOffset( e , i );
+						IncidentElementCoIndex[e.index][i.index] = HyperCube::Cube< D >::IncidentElement( e , i ).coIndex();
+					}
+					CellOffsetAntipodal[e.index] = HyperCube::Cube< D >::CellOffset( e , HyperCube::Cube< D >::IncidentCube( e ).antipodal() );
+					IncidentCube[ e.index ] = HyperCube::Cube< D >::IncidentCube( e );
+					e.directions( Directions[e.index] );
+				}
+			}
+		};
+		template< unsigned int D , unsigned int K1 , unsigned int K2 >
+		struct HyperCubeTables< D , K1 , K2 >
+		{
+			static typename HyperCube::Cube< D >::template Element< K2 > OverlapElements[ HyperCube::Cube< D >::template ElementNum< K1 >() ][ HyperCube::Cube< D >::template OverlapElementNum< K1 , K2 >() ];
+			static bool Overlap[ HyperCube::Cube< D >::template ElementNum< K1 >() ][ HyperCube::Cube< D >::template ElementNum< K2 >() ];
+			static void SetTables( void )
+			{
+				for( typename HyperCube::Cube< D >::template Element< K1 > e ; e<HyperCube::Cube< D >::template ElementNum< K1 >() ; e++ )
+				{
+					for( typename HyperCube::Cube< D >::template Element< K2 > _e ; _e<HyperCube::Cube< D >::template ElementNum< K2 >() ; _e++ )
+						Overlap[e.index][_e.index] = HyperCube::Cube< D >::Overlap( e , _e );
+					HyperCube::Cube< D >::OverlapElements( e , OverlapElements[e.index] );
+				}
+				if( !K2 ) HyperCubeTables< D , K1 >::SetTables();
+			}
+		};
+
+		template< unsigned int D=Dim , unsigned int K1=Dim , unsigned int K2=Dim > static typename std::enable_if<                 K2!=0 >::type SetHyperCubeTables( void )
+		{
+			HyperCubeTables< D , K1 , K2 >::SetTables() ; SetHyperCubeTables< D , K1 , K2-1 >();
+		}
+		template< unsigned int D=Dim , unsigned int K1=Dim , unsigned int K2=Dim > static typename std::enable_if<        K1!=0 && K2==0 >::type SetHyperCubeTables( void )
+		{
+			HyperCubeTables< D , K1 , K2 >::SetTables(); SetHyperCubeTables< D , K1-1 , D >();
+		}
+		template< unsigned int D=Dim , unsigned int K1=Dim , unsigned int K2=Dim > static typename std::enable_if< D!=1 && K1==0 && K2==0 >::type SetHyperCubeTables( void )
+		{
+			HyperCubeTables< D , K1 , K2 >::SetTables() ; SetHyperCubeTables< D-1 , D-1 , D-1 >();
+		}
+		template< unsigned int D=Dim , unsigned int K1=Dim , unsigned int K2=Dim > static typename std::enable_if< D==1 && K1==0 && K2==0 >::type SetHyperCubeTables( void )
+		{
+			HyperCubeTables< D , K1 , K2 >::SetTables();
+		}
+
+		static void SetSliceTableData( const SortedTreeNodes< Dim >& sNodes , SliceTableData* sData0 , XSliceTableData* xData , SliceTableData* sData1 , int depth , int offset )
+		{
+			// [NOTE] This is structure is purely for determining adjacency and is independent of the FEM degree
+			typedef typename FEMTree< Dim , Real >::ConstOneRingNeighborKey ConstOneRingNeighborKey;
+			if( offset<0 || offset>((size_t)1<<depth) ) return;
+			if( sData0 )
+			{
+				std::pair< int , int > span( sNodes.begin( depth , offset-1 ) , sNodes.end( depth , offset ) );
+				sData0->nodeOffset = span.first , sData0->nodeCount = span.second - span.first;
+			}
+			if( sData1 )
+			{
+				std::pair< int , int > span( sNodes.begin( depth , offset ) , sNodes.end( depth , offset+1 ) );
+				sData1->nodeOffset = span.first , sData1->nodeCount = span.second - span.first;
+			}
+			if( xData )
+			{
+				std::pair< int , int > span( sNodes.begin( depth , offset ) , sNodes.end( depth , offset ) );
+				xData->nodeOffset = span.first , xData->nodeCount = span.second - span.first;
+			}
+			SliceTableData* sData[] = { sData0 , sData1 };
+			for( int i=0 ; i<2 ; i++ ) if( sData[i] )
+			{
+				if( sData[i]->nodeCount>sData[i]->_oldNodeCount )
+				{
+					DeletePointer( sData[i]->_cMap ) ; DeletePointer( sData[i]->_eMap ) ; DeletePointer( sData[i]->_fMap );
+					DeletePointer( sData[i]->cTable ) ; DeletePointer( sData[i]->eTable ) ; DeletePointer( sData[i]->fTable );
+					DeletePointer( sData[i]->_processed );
+					sData[i]->_cMap = NewPointer< int >( sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() );
+					sData[i]->_eMap = NewPointer< int >( sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() );
+					sData[i]->_fMap = NewPointer< int >( sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() );
+					sData[i]->_processed = NewPointer< char >( sData[i]->nodeCount );
+					sData[i]->cTable = NewPointer< typename SliceData::SquareCornerIndices >( sData[i]->nodeCount );
+					sData[i]->eTable = NewPointer< typename SliceData::SquareEdgeIndices >( sData[i]->nodeCount );
+					sData[i]->fTable = NewPointer< typename SliceData::SquareFaceIndices >( sData[i]->nodeCount );
+					sData[i]->_oldNodeCount = sData[i]->nodeCount;
+				}
+				memset( sData[i]->_cMap , 0 , sizeof(int) * sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() );
+				memset( sData[i]->_eMap , 0 , sizeof(int) * sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() );
+				memset( sData[i]->_fMap , 0 , sizeof(int) * sData[i]->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() );
+				memset( sData[i]->_processed , 0 , sizeof(char) * sData[i]->nodeCount );
+			}
+			if( xData )
+			{
+				if( xData->nodeCount>xData->_oldNodeCount )
+				{
+					DeletePointer( xData->_eMap ) ; DeletePointer( xData->_fMap );
+					DeletePointer( xData->eTable ) ; DeletePointer( xData->fTable );
+					xData->_eMap = NewPointer< int >( xData->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() );
+					xData->_fMap = NewPointer< int >( xData->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() );
+					xData->eTable = NewPointer< typename SliceData::SquareCornerIndices >( xData->nodeCount );
+					xData->fTable = NewPointer< typename SliceData::SquareEdgeIndices >( xData->nodeCount );
+					xData->_oldNodeCount = xData->nodeCount;
+				}
+				memset( xData->_eMap , 0 , sizeof(int) * xData->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() );
+				memset( xData->_fMap , 0 , sizeof(int) * xData->nodeCount * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() );
+			}
+			std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( depth );
+
+			typedef typename FEMTree< Dim , Real >::ConstOneRingNeighbors ConstNeighbors;
+
+			// Process the corners
+			// z: which side of the cell	\in {0,1}
+			// zOff: which neighbor			\in {-1,0,1}
+			auto ProcessCorners = []( SliceTableData& sData , const ConstNeighbors& neighbors , HyperCube::Direction zDir , int zOff )
+			{
+				const TreeOctNode* node = neighbors.neighbors[1][1][1+zOff];
+				int i = node->nodeData.nodeIndex;
+				// Iterate over the corners in the face
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 0 > _c ; _c<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; _c++ )
+				{
+					bool owner = true;
+
+					typename HyperCube::Cube< Dim >::template Element< 0 > c( zDir , _c.index );																	// Corner-in-cube index
+					typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 0 > my_ic = HyperCubeTables< Dim , 0 >::IncidentCube[c.index];						// The index of the node relative to the corner
+					for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 0 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 0 >() ; ic++ )	// Iterate over the nodes adjacent to the corner
+					{
+						// Get the index of cube relative to the corner neighbors
+						unsigned int xx = HyperCubeTables< Dim , 0 >::CellOffset[c.index][ic.index] + zOff;
+						// If the neighbor exists and comes before, they own the corner
+						if( neighbors.neighbors.data[xx] && ic<my_ic ){ owner = false ; break; }
+					}
+					if( owner )
+					{
+						int myCount = (i - sData.nodeOffset) * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() + _c.index;
+						sData._cMap[ myCount ] = 1;
+						// Set the corner pointer for all cubes incident on the corner
+						for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 0 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 0 >() ; ic++ )	// Iterate over the nodes adjacent to the corner
+						{
+							unsigned int xx = HyperCubeTables< Dim , 0 >::CellOffset[c.index][ic.index] + zOff;
+							// If the neighbor exits, sets its corner
+							if( neighbors.neighbors.data[xx] ) sData.cornerIndices( neighbors.neighbors.data[xx] )[ HyperCubeTables< Dim , 0 >::IncidentElementCoIndex[c.index][ic.index] ] = myCount;
+						}
+					}
+				}
+			};
+			// Process the in-plane edges
+			auto ProcessIEdges = []( SliceTableData& sData , const ConstNeighbors& neighbors , HyperCube::Direction zDir , int zOff )
+			{
+				const TreeOctNode* node = neighbors.neighbors[1][1][1+zOff];
+				int i = node->nodeData.nodeIndex;
+				// Iterate over the edges in the face
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 1 > _e ; _e<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; _e++ )
+				{
+					bool owner = true;
+
+					// The edge in the cube
+					typename HyperCube::Cube< Dim >::template Element< 1 > e( zDir , _e.index );
+					// The index of the cube relative to the edge
+					typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > my_ic = HyperCubeTables< Dim , 1 >::IncidentCube[e.index];
+					// Iterate over the cubes incident on the edge
+					for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ )
+					{
+						// Get the indices of the cube relative to the center
+						unsigned int xx = HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index] + zOff;
+						// If the neighbor exists and comes before, they own the corner
+						if( neighbors.neighbors.data[xx] && ic<my_ic ){ owner = false ; break; }
+					}
+					if( owner )
+					{
+						int myCount = ( i - sData.nodeOffset ) * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() + _e.index;
+						sData._eMap[ myCount ] = 1;
+						// Set all edge indices
+						for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ )
+						{
+							unsigned int xx = HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index] + zOff;
+							// If the neighbor exists, set the index
+							if( neighbors.neighbors.data[xx] ) sData.edgeIndices( neighbors.neighbors.data[xx] )[ HyperCubeTables< Dim , 1 >::IncidentElementCoIndex[e.index][ic.index] ] = myCount;
+						}
+					}
+				}
+			};
+			// Process the cross-plane edges
+			auto ProcessXEdges = []( XSliceTableData& xData , const ConstNeighbors& neighbors )
+			{
+				const TreeOctNode* node = neighbors.neighbors[1][1][1];
+				int i = node->nodeData.nodeIndex;
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 0 > _c ; _c<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; _c++ )
+				{
+					bool owner = true;
+
+					typename HyperCube::Cube< Dim >::template Element< 1 > e( HyperCube::CROSS , _c.index );
+					typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > my_ic = HyperCubeTables< Dim , 1 >::IncidentCube[e.index];
+
+					for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ )
+					{
+						unsigned int xx = HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index];
+						if( neighbors.neighbors.data[xx] && ic<my_ic ){ owner = false ; break; }
+					}
+					if( owner )
+					{
+						int myCount = ( i - xData.nodeOffset ) * HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() + _c.index;
+						xData._eMap[ myCount ] = 1;
+
+						// Set all edge indices
+						for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ )
+						{
+							unsigned int xx = HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index];
+							if( neighbors.neighbors.data[xx] ) xData.edgeIndices( neighbors.neighbors.data[xx] )[ HyperCubeTables< Dim , 1 >::IncidentElementCoIndex[e.index][ic.index] ] = myCount;
+						}
+					}
+				}
+			};
+			// Process the in-plane faces
+			auto ProcessIFaces = []( SliceTableData& sData , const ConstNeighbors& neighbors , HyperCube::Direction zDir , int zOff )
+			{
+				const TreeOctNode* node = neighbors.neighbors[1][1][1+zOff];
+				int i = node->nodeData.nodeIndex;
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 2 > _f ; _f<HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() ; _f++ )
+				{
+					bool owner = true;
+
+					typename HyperCube::Cube< Dim >::template Element< 2 > f( zDir , _f.index );				
+					typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > my_ic = HyperCubeTables< Dim , 2 >::IncidentCube[f.index];
+
+					for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 2 >() ; ic++ )
+					{
+						unsigned int xx = HyperCubeTables< Dim , 2 >::CellOffset[f.index][ic.index] + zOff;
+						if( neighbors.neighbors.data[xx] && ic<my_ic ){ owner = false ; break; }
+					}
+					if( owner )
+					{
+						int myCount = ( i - sData.nodeOffset ) * HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() + _f.index;
+						sData._fMap[ myCount ] = 1;
+
+						// Set all the face indices
+						for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 2 >() ; ic++ )
+						{
+							unsigned int xx = HyperCubeTables< Dim , 2 >::CellOffset[f.index][ic.index] + zOff;
+							if( neighbors.neighbors.data[xx] ) sData.faceIndices( neighbors.neighbors.data[xx] )[ HyperCubeTables< Dim , 2 >::IncidentElementCoIndex[f.index][ic.index] ] = myCount;
+						}
+					}
+				}
+			};
+
+			// Process the cross-plane faces
+			auto ProcessXFaces = []( XSliceTableData& xData , const ConstNeighbors& neighbors )
+			{
+				const TreeOctNode* node = neighbors.neighbors[1][1][1];
+				int i = node->nodeData.nodeIndex;
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 1 > _e ; _e<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; _e++ )
+				{
+					bool owner = true;
+
+					typename HyperCube::Cube< Dim >::template Element< 2 > f( HyperCube::CROSS , _e.index );				
+					typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > my_ic = HyperCubeTables< Dim , 2 >::IncidentCube[f.index];
+
+					for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 2 >() ; ic++ )
+					{
+						unsigned int xx = HyperCubeTables< Dim , 2 >::CellOffset[f.index][ic.index];
+						if( neighbors.neighbors.data[xx] && ic<my_ic ){ owner = false ; break; }
+					}
+					if( owner )
+					{
+						int myCount = ( i - xData.nodeOffset ) * HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() + _e.index;
+						xData._fMap[ myCount ] = 1;
+
+						// Set all the face indices
+						for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 2 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 2 >() ; ic++ )
+						{
+							unsigned int xx = HyperCubeTables< Dim , 2 >::CellOffset[f.index][ic.index];
+							if( neighbors.neighbors.data[xx] ) xData.faceIndices( neighbors.neighbors.data[xx] )[ HyperCubeTables< Dim , 2 >::IncidentElementCoIndex[f.index][ic.index] ] = myCount;
+						}
+					}
+				}
+			};
+
+
+			// Try and get at the nodes outside of the slab through the neighbor key
+#pragma omp parallel for schedule( guided )
+			for( int i=sNodes.begin(depth,offset) ; i<sNodes.end(depth,offset) ; i++ )
+			{
+				ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+				const TreeOctNode* node = sNodes.treeNodes[i];
+				ConstNeighbors& neighbors = neighborKey.getNeighbors( node );
+				for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( !IsActiveNode< Dim >( neighbors.neighbors[i][j][k] ) ) neighbors.neighbors[i][j][k] = NULL;
+
+				if( sData0 )
+				{
+					ProcessCorners( *sData0 , neighbors , HyperCube::BACK , 0 ) , ProcessIEdges( *sData0 , neighbors , HyperCube::BACK , 0 ) , ProcessIFaces( *sData0 , neighbors , HyperCube::BACK , 0 );
+					const TreeOctNode* _node = neighbors.neighbors[1][1][0];
+					if( _node )
+					{
+						ProcessCorners( *sData0 , neighbors , HyperCube::FRONT , -1 ) , ProcessIEdges( *sData0 , neighbors , HyperCube::FRONT , -1 ) , ProcessIFaces( *sData0 , neighbors , HyperCube::FRONT , -1 );
+						sData0->_processed[ _node->nodeData.nodeIndex - sNodes.begin(depth,offset-1) ] = 1;
+					}
+				}
+				if( sData1 )
+				{
+					ProcessCorners( *sData1 , neighbors , HyperCube::FRONT , 0 ) , ProcessIEdges( *sData1 , neighbors , HyperCube::FRONT , 0 ) , ProcessIFaces( *sData1 , neighbors , HyperCube::FRONT , 0 );
+					const TreeOctNode* _node = neighbors.neighbors[1][1][2];
+					if( _node )
+					{
+						ProcessCorners( *sData1 , neighbors , HyperCube::BACK , 1 ) , ProcessIEdges( *sData1 , neighbors , HyperCube::BACK , 1 ) , ProcessIFaces( *sData1, neighbors , HyperCube::BACK , 1 );
+						sData1->_processed[ _node->nodeData.nodeIndex - sNodes.begin(depth,offset+1) ] = true;
+					}
+				}
+				if( xData ) ProcessXEdges( *xData , neighbors ) , ProcessXFaces( *xData , neighbors );
+			}
+			if( sData0 )
+			{
+				int off = sNodes.begin(depth,offset-1) , size = sNodes.end(depth,offset-1) - sNodes.begin(depth,offset-1);
+#pragma omp parallel for schedule( guided )
+				for( int i=0 ; i<size ; i++ ) if( !sData0->_processed[i] )
+				{
+					ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+					const TreeOctNode* node = sNodes.treeNodes[i+off];
+					ConstNeighbors& neighbors = neighborKey.getNeighbors( node );
+					for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( !IsActiveNode< Dim >( neighbors.neighbors[i][j][k] ) ) neighbors.neighbors[i][j][k] = NULL;
+					ProcessCorners( *sData0 , neighbors , HyperCube::FRONT , 0 ) , ProcessIEdges( *sData0 , neighbors , HyperCube::FRONT , 0 ) , ProcessIFaces( *sData0 , neighbors , HyperCube::FRONT , 0 );
+				}
+			}
+			if( sData1 )
+			{
+				int off = sNodes.begin(depth,offset+1) , size = sNodes.end(depth,offset+1) - sNodes.begin(depth,offset+1);
+#pragma omp parallel for schedule( guided )
+				for( int i=0 ; i<size ; i++ ) if( !sData1->_processed[i] )
+				{
+					ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+					const TreeOctNode* node = sNodes.treeNodes[i+off];
+					ConstNeighbors& neighbors = neighborKey.getNeighbors( node );
+					for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( !IsActiveNode< Dim >( neighbors.neighbors[i][j][k] ) ) neighbors.neighbors[i][j][k] = NULL;
+					ProcessCorners( *sData1 , neighbors , HyperCube::BACK , 0 ) , ProcessIEdges( *sData1 , neighbors , HyperCube::BACK , 0 ) , ProcessIFaces( *sData1 , neighbors , HyperCube::BACK , 0 );
+				}
+			}
+
+			auto SetICounts = [&]( SliceTableData& sData )
+			{
+				int cCount = 0 , eCount = 0 , fCount = 0;
+
+				for( int i=0 ; i<sData.nodeCount * (int)HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; i++ ) if( sData._cMap[i] ) sData._cMap[i] = cCount++;
+				for( int i=0 ; i<sData.nodeCount * (int)HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; i++ ) if( sData._eMap[i] ) sData._eMap[i] = eCount++;
+				for( int i=0 ; i<sData.nodeCount * (int)HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() ; i++ ) if( sData._fMap[i] ) sData._fMap[i] = fCount++;
+#pragma omp parallel for
+				for( int i=0 ; i<sData.nodeCount ; i++ )
+				{
+					for( unsigned int j=0 ; j<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; j++ ) sData.cTable[i][j] = sData._cMap[ sData.cTable[i][j] ];
+					for( unsigned int j=0 ; j<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; j++ ) sData.eTable[i][j] = sData._eMap[ sData.eTable[i][j] ];
+					for( unsigned int j=0 ; j<HyperCube::Cube< Dim-1 >::template ElementNum< 2 >() ; j++ ) sData.fTable[i][j] = sData._fMap[ sData.fTable[i][j] ];
+				}
+				sData.cCount = cCount , sData.eCount = eCount , sData.fCount = fCount;
+			};
+			auto SetXCounts = [&]( XSliceTableData& xData )
+			{
+				int eCount = 0 , fCount = 0;
+
+				for( int i=0 ; i<xData.nodeCount * (int)HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; i++ ) if( xData._eMap[i] ) xData._eMap[i] = eCount++;
+				for( int i=0 ; i<xData.nodeCount * (int)HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; i++ ) if( xData._fMap[i] ) xData._fMap[i] = fCount++;
+#pragma omp parallel for
+				for( int i=0 ; i<xData.nodeCount ; i++ )
+				{
+					for( unsigned int j=0 ; j<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; j++ ) xData.eTable[i][j] = xData._eMap[ xData.eTable[i][j] ];
+					for( unsigned int j=0 ; j<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; j++ ) xData.fTable[i][j] = xData._fMap[ xData.fTable[i][j] ];
+				}
+				xData.eCount = eCount , xData.fCount = fCount;
+			};
+
+			if( sData0 ) SetICounts( *sData0 );
+			if( sData1 ) SetICounts( *sData1 );
+			if( xData  ) SetXCounts( *xData  );
+		}
+	};
+
+
+	//////////////////
+	// _SliceValues //
+	//////////////////
+	struct _SliceValues
+	{
+		typename SliceData::SliceTableData sliceData;
+		Pointer( Real ) cornerValues ; Pointer( Point< Real , Dim > ) cornerGradients ; Pointer( char ) cornerSet;
+		Pointer( long long ) edgeKeys ; Pointer( char ) edgeSet;
+		Pointer( _FaceEdges ) faceEdges ; Pointer( char ) faceSet;
+		Pointer( char ) mcIndices;
+		std::unordered_map< long long , std::vector< _IsoEdge > > faceEdgeMap;
+		std::unordered_map< long long , std::pair< int, Vertex > > edgeVertexMap;
+		std::unordered_map< long long , long long > vertexPairMap;
+		std::vector< std::vector< std::pair< long long , std::vector< _IsoEdge > > > > faceEdgeKeyValues;
+		std::vector< std::vector< std::pair< long long , std::pair< int , Vertex > > > > edgeVertexKeyValues;
+		std::vector< std::vector< std::pair< long long , long long > > > vertexPairKeyValues;
+
+		_SliceValues( void )
+		{
+			_oldCCount = _oldECount = _oldFCount = _oldNCount = 0;
+			cornerValues = NullPointer( Real ) ; cornerGradients = NullPointer( Point< Real , Dim > ) ; cornerSet = NullPointer( char );
+			edgeKeys = NullPointer( long long ) ; edgeSet = NullPointer( char );
+			faceEdges = NullPointer( _FaceEdges ) ; faceSet = NullPointer( char );
+			mcIndices = NullPointer( char );
+			edgeVertexKeyValues.resize( omp_get_max_threads() );
+			vertexPairKeyValues.resize( omp_get_max_threads() );
+			faceEdgeKeyValues.resize( omp_get_max_threads() );
+		}
+		~_SliceValues( void )
+		{
+			_oldCCount = _oldECount = _oldFCount = _oldNCount = 0;
+			FreePointer( cornerValues ) ; FreePointer( cornerGradients ) ; FreePointer( cornerSet );
+			FreePointer( edgeKeys ) ; FreePointer( edgeSet );
+			FreePointer( faceEdges ) ; FreePointer( faceSet );
+			FreePointer( mcIndices );
+		}
+		void setEdgeVertexMap( void )
+		{
+			for( int i=0 ; i<edgeVertexKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<edgeVertexKeyValues[i].size() ; j++ ) edgeVertexMap[ edgeVertexKeyValues[i][j].first ] = edgeVertexKeyValues[i][j].second;
+				edgeVertexKeyValues[i].clear();
+			}
+		}
+		void setVertexPairMap( void )
+		{
+			for( int i=0 ; i<vertexPairKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<vertexPairKeyValues[i].size() ; j++ )
+				{
+					vertexPairMap[ vertexPairKeyValues[i][j].first ] = vertexPairKeyValues[i][j].second;
+					vertexPairMap[ vertexPairKeyValues[i][j].second ] = vertexPairKeyValues[i][j].first;
+				}
+				vertexPairKeyValues[i].clear();
+			}
+		}
+		void setFaceEdgeMap( void )
+		{
+			for( int i=0 ; i<faceEdgeKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<faceEdgeKeyValues[i].size() ; j++ )
+				{
+					auto iter = faceEdgeMap.find( faceEdgeKeyValues[i][j].first );
+					if( iter==faceEdgeMap.end() ) faceEdgeMap[ faceEdgeKeyValues[i][j].first ] = faceEdgeKeyValues[i][j].second;
+					else for( int k=0 ; k<faceEdgeKeyValues[i][j].second.size() ; k++ ) iter->second.push_back( faceEdgeKeyValues[i][j].second[k] );
+				}
+				faceEdgeKeyValues[i].clear();
+			}
+		}
+		void reset( bool nonLinearFit )
+		{
+			faceEdgeMap.clear() , edgeVertexMap.clear() , vertexPairMap.clear();
+			for( int i=0 ; i<edgeVertexKeyValues.size() ; i++ ) edgeVertexKeyValues[i].clear();
+			for( int i=0 ; i<vertexPairKeyValues.size() ; i++ ) vertexPairKeyValues[i].clear();
+			for( int i=0 ; i<faceEdgeKeyValues.size() ; i++ ) faceEdgeKeyValues[i].clear();
+
+			if( _oldNCount<sliceData.nodeCount )
+			{
+				_oldNCount = sliceData.nodeCount;
+				FreePointer( mcIndices );
+				if( sliceData.nodeCount>0 ) mcIndices = AllocPointer< char >( _oldNCount );
+			}
+			if( _oldCCount<sliceData.cCount )
+			{
+				_oldCCount = sliceData.cCount;
+				FreePointer( cornerValues ) ; FreePointer( cornerGradients ) ; FreePointer( cornerSet );
+				if( sliceData.cCount>0 )
+				{
+					cornerValues = AllocPointer< Real >( _oldCCount );
+					if( nonLinearFit ) cornerGradients = AllocPointer< Point< Real , Dim > >( _oldCCount );
+					cornerSet = AllocPointer< char >( _oldCCount );
+				}
+			}
+			if( _oldECount<sliceData.eCount )
+			{
+				_oldECount = sliceData.eCount;
+				FreePointer( edgeKeys ) ; FreePointer( edgeSet );
+				edgeKeys = AllocPointer< long long >( _oldECount );
+				edgeSet = AllocPointer< char >( _oldECount );
+			}
+			if( _oldFCount<sliceData.fCount )
+			{
+				_oldFCount = sliceData.fCount;
+				FreePointer( faceEdges ) ; FreePointer( faceSet );
+				faceEdges = AllocPointer< _FaceEdges >( _oldFCount );
+				faceSet = AllocPointer< char >( _oldFCount );
+			}
+
+			if( sliceData.cCount>0 ) memset( cornerSet , 0 , sizeof( char ) * sliceData.cCount );
+			if( sliceData.eCount>0 ) memset(   edgeSet , 0 , sizeof( char ) * sliceData.eCount );
+			if( sliceData.fCount>0 ) memset(   faceSet , 0 , sizeof( char ) * sliceData.fCount );
+		}
+	protected:
+		int _oldCCount , _oldECount , _oldFCount , _oldNCount;
+	};
+
+	///////////////////
+	// _XSliceValues //
+	///////////////////
+	struct _XSliceValues
+	{
+		typename SliceData::XSliceTableData xSliceData;
+		Pointer( long long ) edgeKeys ; Pointer( char ) edgeSet;
+		Pointer( _FaceEdges ) faceEdges ; Pointer( char ) faceSet;
+		std::unordered_map< long long , std::vector< _IsoEdge > > faceEdgeMap;
+		std::unordered_map< long long , std::pair< int, Vertex > > edgeVertexMap;
+		std::unordered_map< long long , long long > vertexPairMap;
+		std::vector< std::vector< std::pair< long long , std::pair< int , Vertex > > > > edgeVertexKeyValues;
+		std::vector< std::vector< std::pair< long long , long long > > > vertexPairKeyValues;
+		std::vector< std::vector< std::pair< long long , std::vector< _IsoEdge > > > > faceEdgeKeyValues;
+
+		_XSliceValues( void )
+		{
+			_oldECount = _oldFCount = 0;
+			edgeKeys = NullPointer( long long ) ; edgeSet = NullPointer( char );
+			faceEdges = NullPointer( _FaceEdges ) ; faceSet = NullPointer( char );
+			edgeVertexKeyValues.resize( omp_get_max_threads() );
+			vertexPairKeyValues.resize( omp_get_max_threads() );
+			faceEdgeKeyValues.resize( omp_get_max_threads() );
+		}
+		~_XSliceValues( void )
+		{
+			_oldECount = _oldFCount = 0;
+			FreePointer( edgeKeys ) ; FreePointer( edgeSet );
+			FreePointer( faceEdges ) ; FreePointer( faceSet );
+		}
+		void setEdgeVertexMap( void )
+		{
+			for( int i=0 ; i<edgeVertexKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<edgeVertexKeyValues[i].size() ; j++ ) edgeVertexMap[ edgeVertexKeyValues[i][j].first ] = edgeVertexKeyValues[i][j].second;
+				edgeVertexKeyValues[i].clear();
+			}
+		}
+		void setVertexPairMap( void )
+		{
+			for( int i=0 ; i<vertexPairKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<vertexPairKeyValues[i].size() ; j++ )
+				{
+					vertexPairMap[ vertexPairKeyValues[i][j].first ] = vertexPairKeyValues[i][j].second;
+					vertexPairMap[ vertexPairKeyValues[i][j].second ] = vertexPairKeyValues[i][j].first;
+				}
+				vertexPairKeyValues[i].clear();
+			}
+		}
+		void setFaceEdgeMap( void )
+		{
+			for( int i=0 ; i<faceEdgeKeyValues.size() ; i++ )
+			{
+				for( int j=0 ; j<faceEdgeKeyValues[i].size() ; j++ )
+				{
+					auto iter = faceEdgeMap.find( faceEdgeKeyValues[i][j].first );
+					if( iter==faceEdgeMap.end() ) faceEdgeMap[ faceEdgeKeyValues[i][j].first ] = faceEdgeKeyValues[i][j].second;
+					else for( int k=0 ; k<faceEdgeKeyValues[i][j].second.size() ; k++ ) iter->second.push_back( faceEdgeKeyValues[i][j].second[k] );
+				}
+				faceEdgeKeyValues[i].clear();
+			}
+		}
+		void reset( void )
+		{
+			faceEdgeMap.clear() , edgeVertexMap.clear() , vertexPairMap.clear();
+			for( int i=0 ; i<edgeVertexKeyValues.size() ; i++ ) edgeVertexKeyValues[i].clear();
+			for( int i=0 ; i<vertexPairKeyValues.size() ; i++ ) vertexPairKeyValues[i].clear();
+			for( int i=0 ; i<faceEdgeKeyValues.size() ; i++ ) faceEdgeKeyValues[i].clear();
+
+			if( _oldECount<xSliceData.eCount )
+			{
+				_oldECount = xSliceData.eCount;
+				FreePointer( edgeKeys ) ; FreePointer( edgeSet );
+				edgeKeys = AllocPointer< long long >( _oldECount );
+				edgeSet = AllocPointer< char >( _oldECount );
+			}
+			if( _oldFCount<xSliceData.fCount )
+			{
+				_oldFCount = xSliceData.fCount;
+				FreePointer( faceEdges ) ; FreePointer( faceSet );
+				faceEdges = AllocPointer< _FaceEdges >( _oldFCount );
+				faceSet = AllocPointer< char >( _oldFCount );
+			}
+			if( xSliceData.eCount>0 ) memset( edgeSet , 0 , sizeof( char ) * xSliceData.eCount );
+			if( xSliceData.fCount>0 ) memset( faceSet , 0 , sizeof( char ) * xSliceData.fCount );
+		}
+
+	protected:
+		int _oldECount , _oldFCount;
+	};
+
+	/////////////////
+	// _SlabValues //
+	/////////////////
+	struct _SlabValues
+	{
+	protected:
+		_XSliceValues _xSliceValues[2];
+		_SliceValues _sliceValues[2];
+	public:
+		_SliceValues& sliceValues( int idx ){ return _sliceValues[idx&1]; }
+		const _SliceValues& sliceValues( int idx ) const { return _sliceValues[idx&1]; }
+		_XSliceValues& xSliceValues( int idx ){ return _xSliceValues[idx&1]; }
+		const _XSliceValues& xSliceValues( int idx ) const { return _xSliceValues[idx&1]; }
+	};
+
+	template< unsigned int ... FEMSigs >
+	static void _SetSliceIsoCorners( const FEMTree< Dim , Real >& tree , ConstPointer( Real ) coefficients , ConstPointer( Real ) coarseCoefficients , Real isoValue , LocalDepth depth , int slice ,         std::vector< _SlabValues >& slabValues , const _Evaluator< UIntPack< FEMSigs ... > , 1 >& evaluator )
+	{
+		if( slice>0          ) _SetSliceIsoCorners< FEMSigs ... >( tree , coefficients , coarseCoefficients , isoValue , depth , slice , HyperCube::FRONT , slabValues , evaluator );
+		if( slice<(1<<depth) ) _SetSliceIsoCorners< FEMSigs ... >( tree , coefficients , coarseCoefficients , isoValue , depth , slice , HyperCube::BACK  , slabValues , evaluator );
+	}
+	template< unsigned int ... FEMSigs >
+	static void _SetSliceIsoCorners( const FEMTree< Dim , Real >& tree , ConstPointer( Real ) coefficients , ConstPointer( Real ) coarseCoefficients , Real isoValue , LocalDepth depth , int slice , HyperCube::Direction zDir , std::vector< _SlabValues >& slabValues , const _Evaluator< UIntPack< FEMSigs ... > , 1 >& evaluator )
+	{
+		static const unsigned int FEMDegrees[] = { FEMSignature< FEMSigs >::Degree ... };
+		_SliceValues& sValues = slabValues[depth].sliceValues( slice );
+		bool useBoundaryEvaluation = false;
+		for( int d=0 ; d<Dim ; d++ ) if( FEMDegrees[d]==0 || ( FEMDegrees[d]==1 && sValues.cornerGradients ) ) useBoundaryEvaluation = true;
+		std::vector< ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > > > neighborKeys( omp_get_max_threads() );
+		std::vector< ConstCornerSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > > > bNeighborKeys( omp_get_max_threads() );
+		if( useBoundaryEvaluation ) for( size_t i=0 ; i<neighborKeys.size() ; i++ ) bNeighborKeys[i].set( tree._localToGlobal( depth ) );
+		else                        for( size_t i=0 ; i<neighborKeys.size() ; i++ )  neighborKeys[i].set( tree._localToGlobal( depth ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i<tree._sNodesEnd(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			Real squareValues[ HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ];
+			ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			ConstCornerSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bNeighborKey = bNeighborKeys[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( leaf->children ) )
+			{
+				const typename SliceData::SquareCornerIndices& cIndices = sValues.sliceData.cornerIndices( leaf );
+
+				bool isInterior = tree._isInteriorlySupported( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , leaf->parent );
+				if( useBoundaryEvaluation ) bNeighborKey.getNeighbors( leaf );
+				else                         neighborKey.getNeighbors( leaf );
+
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 0 > _c ; _c<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; _c++ )
+				{
+					typename HyperCube::Cube< Dim >::template Element< 0 > c( zDir , _c.index );
+					int vIndex = cIndices[_c.index];
+					if( !sValues.cornerSet[vIndex] )
+					{
+						if( sValues.cornerGradients )
+						{
+							CumulativeDerivativeValues< Real , Dim , 1 > p;
+							if( useBoundaryEvaluation ) p = tree.template _getCornerValues< Real , 1 >( bNeighborKey , leaf , c.index , coefficients , coarseCoefficients , evaluator , tree._maxDepth , isInterior );
+							else                        p = tree.template _getCornerValues< Real , 1 >(  neighborKey , leaf , c.index , coefficients , coarseCoefficients , evaluator , tree._maxDepth , isInterior );
+							sValues.cornerValues[vIndex] = p[0] , sValues.cornerGradients[vIndex] = Point< Real , Dim >( p[1] , p[2] , p[3] );
+						}
+						else
+						{
+							if( useBoundaryEvaluation ) sValues.cornerValues[vIndex] = tree.template _getCornerValues< Real , 0 >( bNeighborKey , leaf , c.index , coefficients , coarseCoefficients , evaluator , tree._maxDepth , isInterior )[0];
+							else                        sValues.cornerValues[vIndex] = tree.template _getCornerValues< Real , 0 >(  neighborKey , leaf , c.index , coefficients , coarseCoefficients , evaluator , tree._maxDepth , isInterior )[0];
+						}
+						sValues.cornerSet[vIndex] = 1;
+					}
+					squareValues[_c.index] = sValues.cornerValues[ vIndex ];
+					TreeNode* node = leaf;
+					LocalDepth _depth = depth;
+					int _slice = slice;
+					while( tree._isValidSpaceNode( node->parent ) && (node-node->parent->children)==c.index )
+					{
+						node = node->parent , _depth-- , _slice >>= 1;
+						_SliceValues& _sValues = slabValues[_depth].sliceValues( _slice );
+						const typename SliceData::SquareCornerIndices& _cIndices = _sValues.sliceData.cornerIndices( node );
+						int _vIndex = _cIndices[_c.index];
+						_sValues.cornerValues[_vIndex] = sValues.cornerValues[vIndex];
+						if( _sValues.cornerGradients ) _sValues.cornerGradients[_vIndex] = sValues.cornerGradients[vIndex];
+						_sValues.cornerSet[_vIndex] = 1;
+					}
+				}
+				sValues.mcIndices[ i - sValues.sliceData.nodeOffset ] = HyperCube::Cube< Dim-1 >::MCIndex( squareValues , isoValue );
+			}
+		}
+	}
+
+	/////////////////
+	// _VertexData //
+	/////////////////
+	class _VertexData
+	{
+	public:
+		static const int VERTEX_COORDINATE_SHIFT = ( sizeof( long long ) * 8 ) / Dim;
+		static long long Index( const int index[Dim] ){ long long idx=0 ; for( int dd=0 ; dd<Dim ; dd++ ) idx |= ( ( long long )index[dd] )<<(dd*VERTEX_COORDINATE_SHIFT) ; return idx; }
+
+		static long long EdgeIndex( const TreeNode* node , typename HyperCube::Cube< Dim >::template Element< 1 > e , int maxDepth , int idx[Dim] )
+		{
+			const HyperCube::Direction* x = SliceData::template HyperCubeTables< Dim , 1 >::Directions[ e.index ];
+			int d , off[Dim];
+			node->depthAndOffset( d , off );
+			for( int dd=0 ; dd<Dim ; dd++ )
+			{
+				if( x[dd]==HyperCube::CROSS )
+				{
+					idx[(dd+0)%3] = BinaryNode::CornerIndex( maxDepth+1 , d+1 , off[(dd+0)%3]<<1 , 1 );
+					idx[(dd+1)%3] = BinaryNode::CornerIndex( maxDepth+1 , d   , off[(dd+1)%3] , x[(dd+1)%3]==HyperCube::BACK ? 0 : 1 );
+					idx[(dd+2)%3] = BinaryNode::CornerIndex( maxDepth+1 , d   , off[(dd+2)%3] , x[(dd+2)%3]==HyperCube::BACK ? 0 : 1 );
+				}
+			}
+			return Index( idx );
+		}
+		static long long EdgeIndex( const TreeNode* node , typename HyperCube::Cube< Dim >::template Element< 1 > e , int maxDepth ){ int idx[Dim] ; return EdgeIndex( node , e , maxDepth , idx ); }
+
+		static long long FaceIndex( const TreeNode* node , typename HyperCube::Cube< Dim >::template Element< Dim-1 > f , int maxDepth , int idx[Dim] )
+		{
+			const HyperCube::Direction* x = SliceData::template HyperCubeTables< Dim , 2 >::Directions[ f.index ];
+			int d , o[Dim];
+			node->depthAndOffset( d , o );
+			for( int dd=0 ; dd<Dim ; dd++ )
+				if( x[dd]==HyperCube::CROSS ) idx[dd] = BinaryNode::CornerIndex( maxDepth+1 , d+1 , o[dd]<<1 , 1 );
+				else                          idx[dd] = BinaryNode::CornerIndex( maxDepth+1 , d   , o[dd]    , x[dd]==HyperCube::BACK ? 0 : 1 );
+			return Index( idx );
+		}
+		static long long FaceIndex( const TreeNode* node , typename HyperCube::Cube< Dim >::template Element< Dim-1 > f , int maxDepth ){ int idx[Dim] ; return FaceIndex( node , f , maxDepth , idx ); }
+	};
+
+	template< unsigned int WeightDegree , typename Data , unsigned int DataSig >
+	static void _SetSliceIsoVertices( const FEMTree< Dim , Real >& tree , typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , Real isoValue , LocalDepth depth , int slice , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues >& slabValues , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex )
+	{
+		if( slice>0          ) _SetSliceIsoVertices< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , depth , slice , HyperCube::FRONT , vOffset , mesh , slabValues , SetVertex );
+		if( slice<(1<<depth) ) _SetSliceIsoVertices< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , depth , slice , HyperCube::BACK  , vOffset , mesh , slabValues , SetVertex );
+	}
+	template< unsigned int WeightDegree , typename Data , unsigned int DataSig >
+	static void _SetSliceIsoVertices( const FEMTree< Dim , Real >& tree , typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , Real isoValue , LocalDepth depth , int slice , HyperCube::Direction zDir , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues >& slabValues , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex )
+	{
+		static const unsigned int DataDegree = FEMSignature< DataSig >::Degree;
+		_SliceValues& sValues = slabValues[depth].sliceValues( slice );
+		// [WARNING] In the case Degree=2, these two keys are the same, so we don't have to maintain them separately.
+		std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+		std::vector< ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > > > weightKeys( omp_get_max_threads() );
+		std::vector< ConstPointSupportKey< IsotropicUIntPack< Dim , DataDegree > > > dataKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( tree._localToGlobal( depth ) ) , weightKeys[i].set( tree._localToGlobal( depth ) ) , dataKeys[i].set( tree._localToGlobal( depth ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i<tree._sNodesEnd(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			ConstOneRingNeighborKey& neighborKey =  neighborKeys[ omp_get_thread_num() ];
+			ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey = weightKeys[ omp_get_thread_num() ];
+			ConstPointSupportKey< IsotropicUIntPack< Dim , DataDegree > >& dataKey = dataKeys[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( leaf->children ) )
+			{
+				int idx = i - sValues.sliceData.nodeOffset;
+				const typename SliceData::SquareEdgeIndices& eIndices = sValues.sliceData.edgeIndices( leaf );
+				if( HyperCube::Cube< Dim-1 >::HasMCRoots( sValues.mcIndices[idx] ) )
+				{
+					neighborKey.getNeighbors( leaf );
+					if( densityWeights ) weightKey.getNeighbors( leaf );
+					if( data ) dataKey.getNeighbors( leaf );
+
+					for( typename HyperCube::Cube< Dim-1 >::template Element< 1 > _e ; _e<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; _e++ )
+						if( HyperCube::Cube< 1 >::HasMCRoots( HyperCube::Cube< Dim-1 >::ElementMCIndex( _e , sValues.mcIndices[idx] ) ) )
+						{
+							typename HyperCube::Cube< Dim >::template Element< 1 > e( zDir , _e.index );
+							int vIndex = eIndices[_e.index];
+							if( !sValues.edgeSet[vIndex] )
+							{
+								Vertex vertex;
+								long long key = _VertexData::EdgeIndex( leaf , e , tree._localToGlobal( tree._maxDepth ) );
+								_GetIsoVertex< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , weightKey , dataKey , leaf , _e , zDir , sValues , vertex , SetVertex );
+								bool stillOwner = false;
+								std::pair< int , Vertex > hashed_vertex;
+#pragma omp critical (add_point_access)
+								if( !sValues.edgeSet[vIndex] )
+								{
+									mesh.addOutOfCorePoint( vertex );
+									sValues.edgeSet[ vIndex ] = 1;
+									hashed_vertex = std::pair< int , Vertex >( vOffset , vertex );
+									sValues.edgeKeys[ vIndex ] = key;
+									vOffset++;
+									stillOwner = true;
+								}
+								if( stillOwner ) sValues.edgeVertexKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::pair< int , Vertex > >( key , hashed_vertex ) );
+								if( stillOwner )
+								{
+									// We only need to pass the iso-vertex down if the edge it lies on is adjacent to a coarser leaf
+									auto IsNeeded = [&]( unsigned int depth )
+									{
+										bool isNeeded = false;
+										typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > my_ic = SliceData::template HyperCubeTables< Dim , 1 >::IncidentCube[e.index];
+										for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ ) if( ic!=my_ic )
+										{
+											unsigned int xx = SliceData::template HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index];
+											isNeeded |= !tree._isValidSpaceNode( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx] );
+										}
+										return isNeeded;
+									};
+									if( IsNeeded( depth ) )
+									{
+										const typename HyperCube::Cube< Dim >::template Element< Dim-1 > *f = SliceData::template HyperCubeTables< Dim , 1 , Dim-1 >::OverlapElements[e.index];
+										for( int k=0 ; k<2 ; k++ )
+										{
+											TreeNode* node = leaf;
+											LocalDepth _depth = depth;
+											int _slice = slice;
+											while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 2 , 0 >::Overlap[f[k].index][(unsigned int)(node-node->parent->children) ] )
+											{
+												node = node->parent , _depth-- , _slice >>= 1;
+												_SliceValues& _sValues = slabValues[_depth].sliceValues( _slice );
+												_sValues.edgeVertexKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::pair< int , Vertex > >( key , hashed_vertex ) );
+												if( !IsNeeded( _depth ) ) break;
+											}
+										}
+									}
+								}
+							}
+						}
+				}
+			}
+		}
+	}
+
+	////////////////////
+	// Iso-Extraction //
+	////////////////////
+	template< unsigned int WeightDegree , typename Data , unsigned int DataSig >
+	static void _SetXSliceIsoVertices( const FEMTree< Dim , Real >& tree , typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , Real isoValue , LocalDepth depth , int slab , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues >& slabValues , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex )
+	{
+		static const unsigned int DataDegree = FEMSignature< DataSig >::Degree;
+		_SliceValues& bValues = slabValues[depth].sliceValues ( slab   );
+		_SliceValues& fValues = slabValues[depth].sliceValues ( slab+1 );
+		_XSliceValues& xValues = slabValues[depth].xSliceValues( slab   );
+
+		// [WARNING] In the case Degree=2, these two keys are the same, so we don't have to maintain them separately.
+		std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+		std::vector< ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > > > weightKeys( omp_get_max_threads() );
+		std::vector< ConstPointSupportKey< IsotropicUIntPack< Dim , DataDegree > > > dataKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( tree._localToGlobal( depth ) ) , weightKeys[i].set( tree._localToGlobal( depth ) ) , dataKeys[i].set( tree._localToGlobal( depth ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slab) ; i<tree._sNodesEnd(depth,slab) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			ConstOneRingNeighborKey& neighborKey =  neighborKeys[ omp_get_thread_num() ];
+			ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey = weightKeys[ omp_get_thread_num() ];
+			ConstPointSupportKey< IsotropicUIntPack< Dim , DataDegree > >& dataKey = dataKeys[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( leaf->children ) )
+			{
+				unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ] )<<4;
+				const typename SliceData::SquareCornerIndices& eIndices = xValues.xSliceData.edgeIndices( leaf );
+				if( HyperCube::Cube< Dim >::HasMCRoots( mcIndex ) )
+				{
+					neighborKey.getNeighbors( leaf );
+					if( densityWeights ) weightKey.getNeighbors( leaf );
+					if( data ) dataKey.getNeighbors( leaf );
+					for( typename HyperCube::Cube< Dim-1 >::template Element< 0 > _c ; _c<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; _c++ )
+					{
+						typename HyperCube::Cube< Dim >::template Element< 1 > e( HyperCube::CROSS , _c.index );
+						unsigned int _mcIndex = HyperCube::Cube< Dim >::ElementMCIndex( e , mcIndex );
+						if( HyperCube::Cube< 1 >::HasMCRoots( _mcIndex ) )
+						{
+							int vIndex = eIndices[_c.index];
+							if( !xValues.edgeSet[vIndex] )
+							{
+								Vertex vertex;
+								long long key = _VertexData::EdgeIndex( leaf , e.index , tree._localToGlobal( tree._maxDepth ) );
+								_GetIsoVertex< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , weightKey , dataKey , leaf , _c , bValues , fValues , vertex , SetVertex );
+								bool stillOwner = false;
+								std::pair< int , Vertex > hashed_vertex;
+#pragma omp critical (add_point_access)
+								if( !xValues.edgeSet[vIndex] )
+								{
+									mesh.addOutOfCorePoint( vertex );
+									xValues.edgeSet[ vIndex ] = 1;
+									hashed_vertex = std::pair< int , Vertex >( vOffset , vertex );
+									xValues.edgeKeys[ vIndex ] = key;
+									vOffset++;
+									stillOwner = true;
+								}
+								if( stillOwner ) xValues.edgeVertexKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::pair< int , Vertex > >( key , hashed_vertex ) );
+								if( stillOwner )
+								{
+									// We only need to pass the iso-vertex down if the edge it lies on is adjacent to a coarser leaf
+									auto IsNeeded = [&]( unsigned int depth )
+									{
+										bool isNeeded = false;
+										typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > my_ic = SliceData::template HyperCubeTables< Dim , 1 >::IncidentCube[e.index];
+										for( typename HyperCube::Cube< Dim >::template IncidentCubeIndex< 1 > ic ; ic<HyperCube::Cube< Dim >::template IncidentCubeNum< 1 >() ; ic++ ) if( ic!=my_ic )
+										{
+											unsigned int xx = SliceData::template HyperCubeTables< Dim , 1 >::CellOffset[e.index][ic.index];
+											isNeeded |= !tree._isValidSpaceNode( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx] );
+										}
+										return isNeeded;
+									};
+									if( IsNeeded( depth ) )
+									{
+										const typename HyperCube::Cube< Dim >::template Element< Dim-1 > *f = SliceData::template HyperCubeTables< Dim , 1 , Dim-1 >::OverlapElements[e.index];
+										for( int k=0 ; k<2 ; k++ )
+										{
+											TreeNode* node = leaf;
+											LocalDepth _depth = depth;
+											int _slab = slab;
+											while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 2 , 0 >::Overlap[f[k].index][(unsigned int)(node-node->parent->children) ] )
+											{
+												node = node->parent , _depth-- , _slab >>= 1;
+												_XSliceValues& _xValues = slabValues[_depth].xSliceValues( _slab );
+												_xValues.edgeVertexKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::pair< int , Vertex > >( key , hashed_vertex ) );
+												if( !IsNeeded( _depth ) ) break;
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	static void _CopyFinerSliceIsoEdgeKeys( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slice , std::vector< _SlabValues >& slabValues )
+	{
+		if( slice>0          ) _CopyFinerSliceIsoEdgeKeys( tree , depth , slice , HyperCube::FRONT , slabValues );
+		if( slice<(1<<depth) ) _CopyFinerSliceIsoEdgeKeys( tree , depth , slice , HyperCube::BACK  , slabValues );
+	}
+	static void _CopyFinerSliceIsoEdgeKeys( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slice , HyperCube::Direction zDir , std::vector< _SlabValues >& slabValues )
+	{
+		_SliceValues& pSliceValues = slabValues[depth  ].sliceValues(slice   );
+		_SliceValues& cSliceValues = slabValues[depth+1].sliceValues(slice<<1);
+		typename SliceData::SliceTableData& pSliceData = pSliceValues.sliceData;
+		typename SliceData::SliceTableData& cSliceData = cSliceValues.sliceData;
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i<tree._sNodesEnd(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+			if( IsActiveNode< Dim >( tree._sNodes.treeNodes[i]->children ) )
+			{
+				int thread = omp_get_thread_num();
+				typename SliceData::SquareEdgeIndices& pIndices = pSliceData.edgeIndices( i );
+				// Copy the edges that overlap the coarser edges
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 1 > _e ; _e<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; _e++ )
+				{
+					int pIndex = pIndices[_e.index];
+					if( !pSliceValues.edgeSet[ pIndex ] )
+					{
+						typename HyperCube::Cube< Dim >::template Element< 1 > e( zDir , _e.index );
+						const typename HyperCube::Cube< Dim >::template Element< 0 > *c = SliceData::template HyperCubeTables< Dim , 1 , 0 >::OverlapElements[e.index];
+						// [SANITY CHECK]
+						//						if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c[0].index )!=tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c[1].index ) ) ERROR_OUT( "Finer edges should both be valid or invalid" );
+						if( !tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c[0].index ) || !tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c[1].index ) ) continue;
+
+						int cIndex1 = cSliceData.edgeIndices( tree._sNodes.treeNodes[i]->children + c[0].index )[_e.index];
+						int cIndex2 = cSliceData.edgeIndices( tree._sNodes.treeNodes[i]->children + c[1].index )[_e.index];
+						if( cSliceValues.edgeSet[cIndex1] != cSliceValues.edgeSet[cIndex2] )
+						{
+							long long key;
+							if( cSliceValues.edgeSet[cIndex1] ) key = cSliceValues.edgeKeys[cIndex1];
+							else                                key = cSliceValues.edgeKeys[cIndex2];
+							pSliceValues.edgeKeys[pIndex] = key;
+							pSliceValues.edgeSet[pIndex] = 1;
+						}
+						else if( cSliceValues.edgeSet[cIndex1] && cSliceValues.edgeSet[cIndex2] )
+						{
+							long long key1 = cSliceValues.edgeKeys[cIndex1] , key2 = cSliceValues.edgeKeys[cIndex2];
+							pSliceValues.vertexPairKeyValues[ thread ].push_back( std::pair< long long , long long >( key1 , key2 ) );
+
+							const TreeNode* node = tree._sNodes.treeNodes[i];
+							LocalDepth _depth = depth;
+							int _slice = slice;
+							while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 1 , 0 >::Overlap[e.index][(unsigned int)(node-node->parent->children) ] )
+							{
+								node = node->parent , _depth-- , _slice >>= 1;
+								_SliceValues& _pSliceValues = slabValues[_depth].sliceValues(_slice);
+								_pSliceValues.vertexPairKeyValues[ thread ].push_back( std::pair< long long , long long >( key1 , key2 ) );
+							}
+						}
+					}
+				}
+			}
+	}
+	static void _CopyFinerXSliceIsoEdgeKeys( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slab , std::vector< _SlabValues>& slabValues )
+	{
+		_XSliceValues& pSliceValues  = slabValues[depth  ].xSliceValues(slab);
+		_XSliceValues& cSliceValues0 = slabValues[depth+1].xSliceValues( (slab<<1)|0 );
+		_XSliceValues& cSliceValues1 = slabValues[depth+1].xSliceValues( (slab<<1)|1 );
+		typename SliceData::XSliceTableData& pSliceData  = pSliceValues.xSliceData;
+		typename SliceData::XSliceTableData& cSliceData0 = cSliceValues0.xSliceData;
+		typename SliceData::XSliceTableData& cSliceData1 = cSliceValues1.xSliceData;
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slab) ; i<tree._sNodesEnd(depth,slab) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+			if( IsActiveNode< Dim >( tree._sNodes.treeNodes[i]->children ) )
+			{
+				int thread = omp_get_thread_num();
+				typename SliceData::SquareCornerIndices& pIndices = pSliceData.edgeIndices( i );
+				for( typename HyperCube::Cube< Dim-1 >::template Element< 0 > _c ; _c<HyperCube::Cube< Dim-1 >::template ElementNum< 0 >() ; _c++ )
+				{
+					typename HyperCube::Cube< Dim >::template Element< 1 > e( HyperCube::CROSS , _c.index );
+					int pIndex = pIndices[ _c.index ];
+					if( !pSliceValues.edgeSet[pIndex] )
+					{
+						typename HyperCube::Cube< Dim >::template Element< 0 > c0( HyperCube::BACK , _c.index ) , c1( HyperCube::FRONT , _c.index );
+
+						// [SANITY CHECK]
+						//					if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c0 )!=tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c1 ) ) ERROR_OUT( "Finer edges should both be valid or invalid" );
+						if( !tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c0.index ) || !tree._isValidSpaceNode( tree._sNodes.treeNodes[i]->children + c1.index ) ) continue;
+
+						int cIndex0 = cSliceData0.edgeIndices( tree._sNodes.treeNodes[i]->children + c0.index )[_c.index];
+						int cIndex1 = cSliceData1.edgeIndices( tree._sNodes.treeNodes[i]->children + c1.index )[_c.index];
+						// If there's one zero-crossing along the edge
+						if( cSliceValues0.edgeSet[cIndex0] != cSliceValues1.edgeSet[cIndex1] )
+						{
+							long long key;
+							if( cSliceValues0.edgeSet[cIndex0] ) key = cSliceValues0.edgeKeys[cIndex0]; //, vPair = cSliceValues0.edgeVertexMap.find( key )->second;
+							else                                 key = cSliceValues1.edgeKeys[cIndex1]; //, vPair = cSliceValues1.edgeVertexMap.find( key )->second;
+							pSliceValues.edgeKeys[ pIndex ] = key;
+							pSliceValues.edgeSet[ pIndex ] = 1;
+						}
+						// If there's are two zero-crossings along the edge
+						else if( cSliceValues0.edgeSet[cIndex0] && cSliceValues1.edgeSet[cIndex1] )
+						{
+							long long key0 = cSliceValues0.edgeKeys[cIndex0] , key1 = cSliceValues1.edgeKeys[cIndex1];
+							pSliceValues.vertexPairKeyValues[ thread ].push_back( std::pair< long long , long long >( key0 , key1 ) );
+							const TreeNode* node = tree._sNodes.treeNodes[i];
+							LocalDepth _depth = depth;
+							int _slab = slab;
+							while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 1 , 0 >::Overlap[e.index][(unsigned int)(node-node->parent->children) ] )
+							{
+								node = node->parent , _depth-- , _slab>>= 1;
+								_SliceValues& _pSliceValues = slabValues[_depth].sliceValues(_slab);
+								_pSliceValues.vertexPairKeyValues[ thread ].push_back( std::pair< long long , long long >( key0 , key1 ) );
+							}
+						}
+					}
+				}
+			}
+	}
+	static void _SetSliceIsoEdges( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slice , std::vector< _SlabValues >& slabValues )
+	{
+		if( slice>0          ) _SetSliceIsoEdges( tree , depth , slice , HyperCube::FRONT , slabValues );
+		if( slice<(1<<depth) ) _SetSliceIsoEdges( tree , depth , slice , HyperCube::BACK  , slabValues );
+	}
+	static void _SetSliceIsoEdges( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slice , HyperCube::Direction zDir , std::vector< _SlabValues >& slabValues )
+	{
+		_SliceValues& sValues = slabValues[depth].sliceValues( slice );
+		std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( tree._localToGlobal( depth ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth, slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i<tree._sNodesEnd(depth,slice-(zDir==HyperCube::BACK ? 0 : 1)) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			int isoEdges[ 2 * HyperCube::MarchingSquares::MAX_EDGES ];
+			ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( leaf->children ) )
+			{
+				int idx = i - sValues.sliceData.nodeOffset;
+				const typename SliceData::SquareEdgeIndices& eIndices = sValues.sliceData.edgeIndices( leaf );
+				const typename SliceData::SquareFaceIndices& fIndices = sValues.sliceData.faceIndices( leaf );
+				unsigned char mcIndex = sValues.mcIndices[idx];
+				if( !sValues.faceSet[ fIndices[0] ] )
+				{
+					neighborKey.getNeighbors( leaf );
+					unsigned int xx = WindowIndex< IsotropicUIntPack< Dim , 3 > , IsotropicUIntPack< Dim , 1 > >::Index + (zDir==HyperCube::BACK ? -1 : 1);
+					if( !IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx] ) || !IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx]->children ) )
+					{
+						_FaceEdges fe;
+						fe.count = HyperCube::MarchingSquares::AddEdgeIndices( mcIndex , isoEdges );
+						for( int j=0 ; j<fe.count ; j++ ) for( int k=0 ; k<2 ; k++ )
+						{
+							if( !sValues.edgeSet[ eIndices[ isoEdges[2*j+k] ] ] ) ERROR_OUT( "Edge not set: %d / %d" , slice , 1<<depth );
+							fe.edges[j][k] = sValues.edgeKeys[ eIndices[ isoEdges[2*j+k] ] ];
+						}
+						sValues.faceSet[ fIndices[0] ] = 1;
+						sValues.faceEdges[ fIndices[0] ] = fe;
+
+						TreeNode* node = leaf;
+						LocalDepth _depth = depth;
+						int _slice = slice;
+						typename HyperCube::Cube< Dim >::template Element< Dim-1 > f( zDir , 0 );
+						std::vector< _IsoEdge > edges;
+						edges.resize( fe.count );
+						for( int j=0 ; j<fe.count ; j++ ) edges[j] = fe.edges[j];
+						while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 2 , 0 >::Overlap[f.index][(unsigned int)(node-node->parent->children) ] )
+						{
+							node = node->parent , _depth-- , _slice >>= 1;
+							if( IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( _depth ) ].neighbors.data[xx] ) && IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( _depth ) ].neighbors.data[xx]->children ) ) break;
+							long long key = _VertexData::FaceIndex( node , f , tree._localToGlobal( tree._maxDepth ) );
+							_SliceValues& _sValues = slabValues[_depth].sliceValues( _slice );
+							_sValues.faceEdgeKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::vector< _IsoEdge > >( key , edges ) );
+						}
+					}
+				}
+			}
+		}
+	}
+	static void _SetXSliceIsoEdges( const FEMTree< Dim , Real >& tree , LocalDepth depth , int slab , std::vector< _SlabValues >& slabValues )
+	{
+		_SliceValues& bValues = slabValues[depth].sliceValues ( slab   );
+		_SliceValues& fValues = slabValues[depth].sliceValues ( slab+1 );
+		_XSliceValues& xValues = slabValues[depth].xSliceValues( slab   );
+
+		std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( tree._localToGlobal( depth ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,slab) ; i<tree._sNodesEnd(depth,slab) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			int isoEdges[ 2 * HyperCube::MarchingSquares::MAX_EDGES ];
+			ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			if( !IsActiveNode< Dim >( leaf->children ) )
+			{
+				const typename SliceData::SquareCornerIndices& cIndices = xValues.xSliceData.edgeIndices( leaf );
+				const typename SliceData::SquareEdgeIndices& eIndices = xValues.xSliceData.faceIndices( leaf );
+				unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ]<<4 );
+				{
+					neighborKey.getNeighbors( leaf );
+					// Iterate over the edges on the back
+					for( typename HyperCube::Cube< Dim-1 >::template Element< 1 > _e ; _e<HyperCube::Cube< Dim-1 >::template ElementNum< 1 >() ; _e++ )
+					{
+						typename HyperCube::Cube< Dim >::template Element< 2 > f( HyperCube::CROSS , _e.index );
+						unsigned char _mcIndex = HyperCube::Cube< Dim >::template ElementMCIndex< 2 >( f , mcIndex );
+
+						unsigned int xx = SliceData::template HyperCubeTables< Dim , 2 >::CellOffsetAntipodal[f.index];
+						if(	!xValues.faceSet[ eIndices[_e.index] ] && ( !IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx] ) || !IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( depth ) ].neighbors.data[xx]->children ) ) )
+						{
+							_FaceEdges fe;
+							fe.count = HyperCube::MarchingSquares::AddEdgeIndices( _mcIndex , isoEdges );
+							for( int j=0 ; j<fe.count ; j++ ) for( int k=0 ; k<2 ; k++ )
+							{
+								typename HyperCube::Cube< Dim >::template Element< 1 > e( f , typename HyperCube::Cube< Dim-1 >::template Element< 1 >( isoEdges[2*j+k] ) );
+								HyperCube::Direction dir ; unsigned int coIndex;
+								e.factor( dir , coIndex );
+								if( dir==HyperCube::CROSS ) // Cross-edge
+								{
+									int idx = cIndices[ coIndex ];
+									if( !xValues.edgeSet[ idx ] ) ERROR_OUT( "Edge not set: %d / %d" , slab , 1<<depth );
+									fe.edges[j][k] = xValues.edgeKeys[ idx ];
+								}
+								else
+								{
+									const _SliceValues& sValues = dir==HyperCube::BACK ? bValues : fValues;
+									int idx = sValues.sliceData.edgeIndices(i)[ coIndex ];
+									if( !sValues.edgeSet[ idx ] ) ERROR_OUT( "Edge not set: %d / %d" , slab , 1<<depth );
+									fe.edges[j][k] = sValues.edgeKeys[ idx ];
+								}
+							}
+							xValues.faceSet[ eIndices[_e.index] ] = 1;
+							xValues.faceEdges[ eIndices[_e.index] ] = fe;
+
+							TreeNode* node = leaf;
+							LocalDepth _depth = depth;
+							int _slab = slab;
+							std::vector< _IsoEdge > edges;
+							edges.resize( fe.count );
+							for( int j=0 ; j<fe.count ; j++ ) edges[j] = fe.edges[j];
+							while( tree._isValidSpaceNode( node->parent ) && SliceData::template HyperCubeTables< Dim , 2 , 0 >::Overlap[f.index][(unsigned int)(node-node->parent->children) ] )
+							{
+								node = node->parent , _depth-- , _slab >>= 1;
+								if( IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( _depth ) ].neighbors.data[xx] ) && IsActiveNode< Dim >( neighborKey.neighbors[ tree._localToGlobal( _depth ) ].neighbors.data[xx]->children ) ) break;
+								long long key = _VertexData::FaceIndex( node , f , tree._localToGlobal( tree._maxDepth ) );
+								_XSliceValues& _xValues = slabValues[_depth].xSliceValues( _slab );
+								_xValues.faceEdgeKeyValues[ omp_get_thread_num() ].push_back( std::pair< long long , std::vector< _IsoEdge > >( key , edges ) );
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	static void _SetIsoSurface( const FEMTree< Dim , Real >& tree , LocalDepth depth , int offset , const _SliceValues& bValues , const _SliceValues& fValues , const _XSliceValues& xValues , CoredMeshData< Vertex >& mesh , bool polygonMesh , bool addBarycenter , int& vOffset , bool flipOrientation )
+	{
+		std::vector< std::pair< int , Vertex > > polygon;
+		std::vector< std::vector< _IsoEdge > > edgess( omp_get_max_threads() );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(depth,offset) ; i<tree._sNodesEnd(depth,offset) ; i++ ) if( tree._isValidSpaceNode( tree._sNodes.treeNodes[i] ) )
+		{
+			std::vector< _IsoEdge >& edges = edgess[ omp_get_thread_num() ];
+			TreeNode* leaf = tree._sNodes.treeNodes[i];
+			int res = 1<<depth;
+			LocalDepth d ; LocalOffset off;
+			tree._localDepthAndOffset( leaf , d , off );
+			bool inBounds = off[0]>=0 && off[0]<res && off[1]>=0 && off[1]<res && off[2]>=0 && off[2]<res;
+			if( inBounds && !IsActiveNode< Dim >( leaf->children ) )
+			{
+				edges.clear();
+				unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ]<<4 );
+				// [WARNING] Just because the node looks empty doesn't mean it doesn't get eges from finer neighbors
+				{
+					// Gather the edges from the faces (with the correct orientation)
+					for( typename HyperCube::Cube< Dim >::template Element< Dim-1 > f ; f<HyperCube::Cube< Dim >::template ElementNum< Dim-1 >() ; f++ )
+					{
+						int flip = HyperCube::Cube< Dim >::IsOriented( f ) ? 0 : 1;
+						HyperCube::Direction fDir = f.direction();
+						if( fDir==HyperCube::BACK || fDir==HyperCube::FRONT )
+						{
+							const _SliceValues& sValues = (fDir==HyperCube::BACK) ? bValues : fValues;
+							int fIdx = sValues.sliceData.faceIndices(i)[0];
+							if( sValues.faceSet[fIdx] )
+							{
+								const _FaceEdges& fe = sValues.faceEdges[ fIdx ];
+								for( int j=0 ; j<fe.count ; j++ ) edges.push_back( _IsoEdge( fe.edges[j][flip] , fe.edges[j][1-flip] ) );
+							}
+							else
+							{
+								long long key = _VertexData::FaceIndex( leaf , f , tree._localToGlobal( tree._maxDepth ) );
+								typename std::unordered_map< long long, std::vector< _IsoEdge > >::const_iterator iter = sValues.faceEdgeMap.find(key);
+								if( iter!=sValues.faceEdgeMap.end() )
+								{
+									const std::vector< _IsoEdge >& _edges = iter->second;
+									for( size_t j=0 ; j<_edges.size() ; j++ ) edges.push_back( _IsoEdge( _edges[j][flip] , _edges[j][1-flip] ) );
+								}
+								else ERROR_OUT( "Invalid faces: %d  %d" , i , fDir==HyperCube::BACK ? "back" : ( fDir==HyperCube::FRONT ? "front" : ( fDir==HyperCube::CROSS ? "cross" : "unknown" ) ) );
+							}
+						}
+						else
+						{
+							int fIdx = xValues.xSliceData.faceIndices(i)[ f.coIndex() ];
+							if( xValues.faceSet[fIdx] )
+							{
+								const _FaceEdges& fe = xValues.faceEdges[ fIdx ];
+								for( int j=0 ; j<fe.count ; j++ ) edges.push_back( _IsoEdge( fe.edges[j][flip] , fe.edges[j][1-flip] ) );
+							}
+							else
+							{
+								long long key = _VertexData::FaceIndex( leaf , f , tree._localToGlobal( tree._maxDepth ) );
+								typename std::unordered_map< long long , std::vector< _IsoEdge > >::const_iterator iter = xValues.faceEdgeMap.find(key);
+								if( iter!=xValues.faceEdgeMap.end() )
+								{
+									const std::vector< _IsoEdge >& _edges = iter->second;
+									for( size_t j=0 ; j<_edges.size() ; j++ ) edges.push_back( _IsoEdge( _edges[j][flip] , _edges[j][1-flip] ) );
+								}
+								else ERROR_OUT( "Invalid faces: %d  %s" , i , fDir==HyperCube::BACK ? "back" : ( fDir==HyperCube::FRONT ? "front" : ( fDir==HyperCube::CROSS ? "cross" : "unknown" ) ) );
+							}
+						}
+					}
+					// Get the edge loops
+					std::vector< std::vector< long long  > > loops;
+					while( edges.size() )
+					{
+						loops.resize( loops.size()+1 );
+						_IsoEdge edge = edges.back();
+						edges.pop_back();
+						long long start = edge[0] , current = edge[1];
+						while( current!=start )
+						{
+							int idx;
+							for( idx=0 ; idx<(int)edges.size() ; idx++ ) if( edges[idx][0]==current ) break;
+							if( idx==edges.size() )
+							{
+								typename std::unordered_map< long long, long long >::const_iterator iter;
+								if     ( (iter=bValues.vertexPairMap.find(current))!=bValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
+								else if( (iter=fValues.vertexPairMap.find(current))!=fValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
+								else if( (iter=xValues.vertexPairMap.find(current))!=xValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
+								else
+								{
+									LocalDepth d ; LocalOffset off;
+									tree._localDepthAndOffset( leaf , d , off );
+									ERROR_OUT( "Failed to close loop [%d: %d %d %d] | (%d): %lld" , d-1 , off[0] , off[1] , off[2] , i , current );
+								}
+							}
+							else
+							{
+								loops.back().push_back( current );
+								current = edges[idx][1];
+								edges[idx] = edges.back() , edges.pop_back();
+							}
+						}
+						loops.back().push_back( start );
+					}
+					// Add the loops to the mesh
+					for( size_t j=0 ; j<loops.size() ; j++ )
+					{
+						std::vector< std::pair< int , Vertex > > polygon( loops[j].size() );
+						for( size_t k=0 ; k<loops[j].size() ; k++ )
+						{
+							long long key = loops[j][k];
+							typename std::unordered_map< long long, std::pair< int, Vertex > >::const_iterator iter;
+							size_t kk = flipOrientation ? loops[j].size()-1-k : k;
+							if     ( ( iter=bValues.edgeVertexMap.find( key ) )!=bValues.edgeVertexMap.end() ) polygon[kk] = iter->second;
+							else if( ( iter=fValues.edgeVertexMap.find( key ) )!=fValues.edgeVertexMap.end() ) polygon[kk] = iter->second;
+							else if( ( iter=xValues.edgeVertexMap.find( key ) )!=xValues.edgeVertexMap.end() ) polygon[kk] = iter->second;
+							else ERROR_OUT( "Couldn't find vertex in edge map" );
+						}
+						_AddIsoPolygons( mesh , polygon , polygonMesh , addBarycenter , vOffset );
+					}
+				}
+			}
+		}
+	}
+
+	template< unsigned int WeightDegree , typename Data , unsigned int DataSig >
+	static bool _GetIsoVertex( const FEMTree< Dim , Real >& tree , typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , Real isoValue , ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , ConstPointSupportKey< IsotropicUIntPack< Dim , FEMSignature< DataSig >::Degree > >& dataKey , const TreeNode* node , typename HyperCube::template Cube< Dim-1 >::template Element< 1 > _e , HyperCube::Direction zDir , const _SliceValues& sValues , Vertex& vertex , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex )
+	{
+		static const unsigned int DataDegree = FEMSignature< DataSig >::Degree;
+		Point< Real , Dim > position;
+		int c0 , c1;
+		const typename HyperCube::Cube< Dim-1 >::template Element< 0 > *_c = SliceData::template HyperCubeTables< Dim-1 , 1 , 0 >::OverlapElements[_e.index];
+		c0 = _c[0].index , c1 = _c[1].index;
+
+		bool nonLinearFit = sValues.cornerGradients!=NullPointer( Point< Real , Dim > );
+		const typename SliceData::SquareCornerIndices& idx = sValues.sliceData.cornerIndices( node );
+		Real x0 = sValues.cornerValues[idx[c0]] , x1 = sValues.cornerValues[idx[c1]];
+		Point< Real , Dim > s;
+		Real start , width;
+		tree._startAndWidth( node , s , width );
+		int o;
+		{
+			const HyperCube::Direction* dirs = SliceData::template HyperCubeTables< Dim-1 , 1 >::Directions[ _e.index ];
+			for( int d=0 ; d<Dim-1 ; d++ ) if( dirs[d]==HyperCube::CROSS )
+			{
+				o = d;
+				start = s[d];
+				for( int dd=1 ; dd<Dim-1 ; dd++ ) position[(d+dd)%(Dim-1)] = s[(d+dd)%(Dim-1)] + width * ( dirs[(d+dd)%(Dim-1)]==HyperCube::BACK ? 0 : 1 );
+			}
+		}
+		position[ Dim-1 ] = s[Dim-1] + width * ( zDir==HyperCube::BACK ? 0 : 1 );
+
+		double averageRoot;
+		bool rootFound = false;
+		if( nonLinearFit )
+		{
+			double dx0 = sValues.cornerGradients[idx[c0]][o] * width , dx1 = sValues.cornerGradients[idx[c1]][o] * width;
+
+			// The scaling will turn the Hermite Spline into a quadratic
+			double scl = (x1-x0) / ( (dx1+dx0 ) / 2 );
+			dx0 *= scl , dx1 *= scl;
+
+			// Hermite Spline
+			Polynomial< 2 > P;
+			P.coefficients[0] = x0;
+			P.coefficients[1] = dx0;
+			P.coefficients[2] = 3*(x1-x0)-dx1-2*dx0;
+
+			double roots[2];
+			int rCount = 0 , rootCount = P.getSolutions( isoValue , roots , 0 );
+			averageRoot = 0;
+			for( int i=0 ; i<rootCount ; i++ ) if( roots[i]>=0 && roots[i]<=1 ) averageRoot += roots[i] , rCount++;
+			if( rCount ) rootFound = true;
+			averageRoot /= rCount;
+		}
+		if( !rootFound )
+		{
+			// We have a linear function L, with L(0) = x0 and L(1) = x1
+			// => L(t) = x0 + t * (x1-x0)
+			// => L(t) = isoValue <=> t = ( isoValue - x0 ) / ( x1 - x0 )
+			if( x0==x1 ) ERROR_OUT( "Not a zero-crossing root: %g %g" , x0 , x1 );
+			averageRoot = ( isoValue - x0 ) / ( x1 - x0 );
+		}
+		if( averageRoot<=0 || averageRoot>=1 )
+		{
+			WARN( "Bad average root: %f\t(%f %f) (%f)" , averageRoot , x0 , x1 , isoValue );
+			if( averageRoot<0 ) averageRoot = 0;
+			if( averageRoot>1 ) averageRoot = 1;
+		}
+		position[o] = Real( start + width*averageRoot );
+		Real depth = (Real)1.;
+		Data dataValue;
+		if( densityWeights )
+		{
+			Real weight;
+			tree._getSampleDepthAndWeight( *densityWeights , node , position , weightKey , depth , weight );
+		}
+		if( data )
+		{
+			if( DataDegree==0 ) 
+			{
+				Point< Real , 3 > center( s[0] + width/2 , s[1] + width/2 , s[2] + width/2 );
+				dataValue = tree.template _evaluate< ProjectiveData< Data , Real > , SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > , 0 >( *data , center , *pointEvaluator , dataKey ).value();
+			}
+			else dataValue = tree.template _evaluate< ProjectiveData< Data , Real > , SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > , 0 >( *data , position , *pointEvaluator , dataKey ).value();
+		}
+		SetVertex( vertex , position , depth , dataValue );
+		return true;
+	}
+	template< unsigned int WeightDegree , typename Data , unsigned int DataSig >
+	static bool _GetIsoVertex( const FEMTree< Dim , Real >& tree , typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , Real isoValue , ConstPointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , ConstPointSupportKey< IsotropicUIntPack< Dim , FEMSignature< DataSig >::Degree > >& dataKey , const TreeNode* node , typename HyperCube::template Cube< Dim-1 >::template Element< 0 > _c , const _SliceValues& bValues , const _SliceValues& fValues , Vertex& vertex , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex )
+	{
+		static const unsigned int DataDegree = FEMSignature< DataSig >::Degree;
+		Point< Real , Dim > position;
+
+		bool nonLinearFit = bValues.cornerGradients!=NullPointer( Point< Real , Dim > ) && fValues.cornerGradients!=NullPointer( Point< Real , Dim > );
+		const typename SliceData::SquareCornerIndices& idx0 = bValues.sliceData.cornerIndices( node );
+		const typename SliceData::SquareCornerIndices& idx1 = fValues.sliceData.cornerIndices( node );
+		Real x0 = bValues.cornerValues[ idx0[_c.index] ] , x1 = fValues.cornerValues[ idx1[_c.index] ];
+		Point< Real , Dim > s;
+		Real start , width;
+		tree._startAndWidth( node , s , width );
+		start = s[2];
+		int x , y;
+		{
+			const HyperCube::Direction* xx = SliceData::template HyperCubeTables< Dim-1 , 0 >::Directions[ _c.index ];
+			x = xx[0]==HyperCube::BACK ? 0 : 1 , y = xx[1]==HyperCube::BACK ? 0 : 1;
+		}
+
+		position[0] = s[0] + width*x;
+		position[1] = s[1] + width*y;
+
+		double averageRoot;
+		bool rootFound = false;
+
+		if( nonLinearFit )
+		{
+			double dx0 = bValues.cornerGradients[ idx0[_c.index] ][2] * width , dx1 = fValues.cornerGradients[ idx1[_c.index] ][2] * width;
+			// The scaling will turn the Hermite Spline into a quadratic
+			double scl = (x1-x0) / ( (dx1+dx0 ) / 2 );
+			dx0 *= scl , dx1 *= scl;
+
+			// Hermite Spline
+			Polynomial< 2 > P;
+			P.coefficients[0] = x0;
+			P.coefficients[1] = dx0;
+			P.coefficients[2] = 3*(x1-x0)-dx1-2*dx0;
+
+			double roots[2];
+			int rCount = 0 , rootCount = P.getSolutions( isoValue , roots , 0 );
+			averageRoot = 0;
+			for( int i=0 ; i<rootCount ; i++ ) if( roots[i]>=0 && roots[i]<=1 ) averageRoot += roots[i] , rCount++;
+			if( rCount ) rootFound = true;
+			averageRoot /= rCount;
+		}
+		if( !rootFound )
+		{
+			// We have a linear function L, with L(0) = x0 and L(1) = x1
+			// => L(t) = x0 + t * (x1-x0)
+			// => L(t) = isoValue <=> t = ( isoValue - x0 ) / ( x1 - x0 )
+			if( x0==x1 ) ERROR_OUT( "Not a zero-crossing root: %g %g" , x0 , x1 );
+			averageRoot = ( isoValue - x0 ) / ( x1 - x0 );
+		}
+		if( averageRoot<=0 || averageRoot>=1 )
+		{
+			WARN( "Bad average root: %f\t(%f %f) (%f)" , averageRoot , x0 , x1 , isoValue );
+			if( averageRoot<0 ) averageRoot = 0;
+			if( averageRoot>1 ) averageRoot = 1;
+		}
+		position[2] = Real( start + width*averageRoot );
+		Real depth = (Real)1.;
+		Data dataValue;
+		if( densityWeights )
+		{
+			Real weight;
+			tree._getSampleDepthAndWeight( *densityWeights , node , position , weightKey , depth , weight );
+		}
+		if( data )
+		{
+			if( DataDegree==0 ) 
+			{
+				Point< Real , 3 > center( s[0] + width/2 , s[1] + width/2 , s[2] + width/2 );
+				dataValue = tree.template _evaluate< ProjectiveData< Data , Real > , SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > , 0 >( *data , center , *pointEvaluator , dataKey ).value();
+			}
+			else dataValue = tree.template _evaluate< ProjectiveData< Data , Real > , SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > , 0 >( *data , position , *pointEvaluator , dataKey ).value();
+		}
+		SetVertex( vertex , position , depth , dataValue );
+		return true;
+	}
+
+	static int _AddIsoPolygons( CoredMeshData< Vertex >& mesh , std::vector< std::pair< int , Vertex > >& polygon , bool polygonMesh , bool addBarycenter , int& vOffset )
+	{
+		if( polygonMesh )
+		{
+			std::vector< int > vertices( polygon.size() );
+			for( int i=0 ; i<(int)polygon.size() ; i++ ) vertices[i] = polygon[polygon.size()-1-i].first;
+			mesh.addPolygon_s( vertices );
+			return 1;
+		}
+		if( polygon.size()>3 )
+		{
+			bool isCoplanar = false;
+			std::vector< int > triangle( 3 );
+
+			if( addBarycenter )
+				for( int i=0 ; i<(int)polygon.size() ; i++ )
+					for( int j=0 ; j<i ; j++ )
+						if( (i+1)%polygon.size()!=j && (j+1)%polygon.size()!=i )
+						{
+							Vertex v1 = polygon[i].second , v2 = polygon[j].second;
+							for( int k=0 ; k<3 ; k++ ) if( v1.point[k]==v2.point[k] ) isCoplanar = true;
+						}
+			if( isCoplanar )
+			{
+				Vertex c;
+				c *= 0;
+				for( int i=0 ; i<(int)polygon.size() ; i++ ) c += polygon[i].second;
+				c /= ( typename Vertex::Real )polygon.size();
+				int cIdx;
+#pragma omp critical (add_barycenter_point_access)
+				{
+					cIdx = mesh.addOutOfCorePoint( c );
+					vOffset++;
+				}
+				for( int i=0 ; i<(int)polygon.size() ; i++ )
+				{
+					triangle[0] = polygon[ i                  ].first;
+					triangle[1] = cIdx;
+					triangle[2] = polygon[(i+1)%polygon.size()].first;
+					mesh.addPolygon_s( triangle );
+				}
+				return (int)polygon.size();
+			}
+			else
+			{
+				std::vector< Point< Real , Dim > > vertices( polygon.size() );
+				for( int i=0 ; i<(int)polygon.size() ; i++ ) vertices[i] = polygon[i].second.point;
+				std::vector< TriangleIndex > triangles = MinimalAreaTriangulation< Real , Dim >( ( ConstPointer( Point< Real , Dim > ) )GetPointer( vertices ) , vertices.size() );
+				if( triangles.size()!=polygon.size()-2 ) ERROR_OUT( "Minimal area triangulation failed: %d != %d" , (int)triangles.size() , (int)polygon.size()-2 );
+				for( int i=0 ; i<(int)triangles.size() ; i++ )
+				{
+					for( int j=0 ; j<3 ; j++ ) triangle[2-j] = polygon[ triangles[i].idx[j] ].first;
+					mesh.addPolygon_s( triangle );
+				}
+			}
+		}
+		else if( polygon.size()==3 )
+		{
+			std::vector< int > vertices( 3 );
+			for( int i=0 ; i<3 ; i++ ) vertices[2-i] = polygon[i].first;
+			mesh.addPolygon_s( vertices );
+		}
+		return (int)polygon.size()-2;
+	}
+public:
+	struct IsoStats
+	{
+		double cornersTime , verticesTime , edgesTime , surfaceTime;
+		double copyFinerTime , setTableTime;
+		IsoStats( void ) : cornersTime(0) , verticesTime(0) , edgesTime(0) , surfaceTime(0) , copyFinerTime(0) , setTableTime(0) {;}
+		std::string toString( void ) const
+		{
+			std::stringstream stream;
+			stream << "Corners / Vertices / Edges / Surface / Set Table / Copy Finer: ";
+			stream << std::fixed << std::setprecision(1) << cornersTime << " / " << verticesTime << " / " << edgesTime << " / " << surfaceTime << " / " << setTableTime << " / " << copyFinerTime;
+			stream << " (s)";
+			return stream.str();
+		}
+	};
+	template< typename Data , unsigned int ... FEMSigs , unsigned int WeightDegree , unsigned int DataSig >
+	static IsoStats Extract( UIntPack< FEMSigs ... > , UIntPack< WeightDegree > , UIntPack< DataSig > , const FEMTree< Dim , Real >& tree , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data , const DenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients , Real isoValue , CoredMeshData< Vertex >& mesh , std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex , bool nonLinearFit , bool addBarycenter , bool polygonMesh , bool flipOrientation )
+	{
+		IsoStats isoStats;
+		static_assert( sizeof...(FEMSigs)==Dim , "[ERROR] Number of signatures should match dimension" );
+		tree._setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+		static const unsigned int DataDegree = FEMSignature< DataSig >::Degree;
+		static const int FEMDegrees[] = { FEMSignature< FEMSigs >::Degree ... };
+		for( int d=0 ; d<Dim ; d++ ) if( FEMDegrees[d]==0 && nonLinearFit ) WARN( "Constant B-Splines do not support non-linear interpolation" ) , nonLinearFit = false;
+
+		SliceData::SetHyperCubeTables();
+
+		typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >* pointEvaluator = NULL;
+		if( data ) pointEvaluator = new typename FEMIntegrator::template PointEvaluator< IsotropicUIntPack< Dim , DataSig > , ZeroUIntPack< Dim > >( tree._maxDepth );
+		DenseNodeData< Real , UIntPack< FEMSigs ... > > coarseCoefficients( tree._sNodesEnd( tree._maxDepth-1 ) );
+		memset( coarseCoefficients() , 0 , sizeof(Real)*tree._sNodesEnd( tree._maxDepth-1 ) );
+#pragma omp parallel for
+		for( int i=tree._sNodesBegin(0) ; i<tree._sNodesEnd( tree._maxDepth-1 ) ; i++ ) coarseCoefficients[i] = coefficients[i];
+		typename FEMIntegrator::template RestrictionProlongation< UIntPack< FEMSigs ... > > rp;
+		for( LocalDepth d=1 ; d<tree._maxDepth ; d++ ) tree._upSample( UIntPack< FEMSigs ... >() , rp , d , coarseCoefficients() );
+		FEMTree< Dim , Real >::MemoryUsage();
+
+		std::vector< _Evaluator< UIntPack< FEMSigs ... > , 1 > > evaluators( tree._maxDepth+1 );
+		for( LocalDepth d=0 ; d<=tree._maxDepth ; d++ ) evaluators[d].set( tree._maxDepth );
+
+		int vertexOffset = 0;
+
+		std::vector< _SlabValues > slabValues( tree._maxDepth+1 );
+
+		// Initialize the back slice
+		for( LocalDepth d=tree._maxDepth ; d>=0 ; d-- )
+		{
+			double t = Time();
+			SliceData::SetSliceTableData( tree._sNodes , &slabValues[d].sliceValues(0).sliceData , &slabValues[d].xSliceValues(0).xSliceData , &slabValues[d].sliceValues(1).sliceData , tree._localToGlobal( d ) , tree._localInset( d ) );
+			isoStats.setTableTime += Time()-t;
+			slabValues[d].sliceValues (0).reset( nonLinearFit );
+			slabValues[d].sliceValues (1).reset( nonLinearFit );
+			slabValues[d].xSliceValues(0).reset( );
+		}
+		for( LocalDepth d=tree._maxDepth ; d>=0 ; d-- )
+		{
+			// Copy edges from finer
+			double t = Time();
+			if( d<tree._maxDepth ) _CopyFinerSliceIsoEdgeKeys( tree , d , 0 , slabValues );
+			isoStats.copyFinerTime += Time()-t , t = Time();
+			_SetSliceIsoCorners< FEMSigs ... >( tree , coefficients() , coarseCoefficients() , isoValue , d , 0 , slabValues , evaluators[d] );
+			isoStats.cornersTime += Time()-t , t = Time();
+			_SetSliceIsoVertices< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , d , 0 , vertexOffset , mesh , slabValues , SetVertex );
+			isoStats.verticesTime += Time()-t , t = Time();
+			_SetSliceIsoEdges( tree , d , 0 , slabValues );
+			isoStats.edgesTime += Time()-t , t = Time();
+		}
+
+		// Iterate over the slices at the finest level
+		for( int slice=0 ; slice<( 1<<tree._maxDepth ) ; slice++ )
+		{
+			// Process at all depths that contain this slice
+			LocalDepth d ; int o;
+			for( d=tree._maxDepth , o=slice+1 ; d>=0 ; d-- , o>>=1 )
+			{
+				// Copy edges from finer (required to ensure we correctly track edge cancellations)
+				double t = Time();
+				if( d<tree._maxDepth )
+				{
+					_CopyFinerSliceIsoEdgeKeys( tree , d , o , slabValues );
+					_CopyFinerXSliceIsoEdgeKeys( tree , d , o-1 , slabValues );
+				}
+				isoStats.copyFinerTime += Time()-t , t = Time();
+				// Set the slice values/vertices
+				_SetSliceIsoCorners< FEMSigs ... >( tree , coefficients() , coarseCoefficients() , isoValue , d , o , slabValues , evaluators[d] );
+				isoStats.cornersTime += Time()-t , t = Time();
+				_SetSliceIsoVertices< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , d , o , vertexOffset , mesh , slabValues , SetVertex );
+				isoStats.verticesTime += Time()-t , t = Time();
+				_SetSliceIsoEdges( tree , d , o , slabValues );
+				isoStats.edgesTime += Time()-t , t = Time();
+
+				// Set the cross-slice edges
+				_SetXSliceIsoVertices< WeightDegree , Data , DataSig >( tree , pointEvaluator , densityWeights , data , isoValue , d , o-1 , vertexOffset , mesh , slabValues , SetVertex );
+				isoStats.verticesTime += Time()-t , t = Time();
+				_SetXSliceIsoEdges( tree , d , o-1 , slabValues );
+				isoStats.edgesTime += Time()-t , t = Time();
+
+#pragma omp parallel sections
+				{
+#pragma omp section
+					slabValues[d]. sliceValues(o-1).setEdgeVertexMap();
+#pragma omp section
+					slabValues[d]. sliceValues(o  ).setEdgeVertexMap();
+#pragma omp section
+					slabValues[d].xSliceValues(o-1).setEdgeVertexMap();
+#pragma omp section
+					slabValues[d]. sliceValues(o-1).setVertexPairMap();
+#pragma omp section
+					slabValues[d]. sliceValues(o  ).setVertexPairMap();
+#pragma omp section
+					slabValues[d].xSliceValues(o-1).setVertexPairMap();
+#pragma omp section
+					slabValues[d]. sliceValues(o-1).setFaceEdgeMap();
+#pragma omp section
+					slabValues[d]. sliceValues(o  ).setFaceEdgeMap();
+#pragma omp section
+					slabValues[d].xSliceValues(o-1).setFaceEdgeMap();
+				}
+				// Add the triangles
+				t = Time();
+				_SetIsoSurface( tree , d , o-1 , slabValues[d].sliceValues(o-1) , slabValues[d].sliceValues(o) , slabValues[d].xSliceValues(o-1) , mesh , polygonMesh , addBarycenter , vertexOffset , flipOrientation );
+				isoStats.surfaceTime += Time()-t;
+
+				if( o&1 ) break;
+			}
+
+			for( d=tree._maxDepth , o=slice+1 ; d>=0 ; d-- , o>>=1 )
+			{
+				// Initialize for the next pass
+				if( o<(1<<(d+1)) )
+				{
+					double t = Time();
+					SliceData::SetSliceTableData( tree._sNodes , NULL , &slabValues[d].xSliceValues(o).xSliceData , &slabValues[d].sliceValues(o+1).sliceData , tree._localToGlobal( d ) , o + tree._localInset( d ) );
+					isoStats.setTableTime += Time()-t;
+					slabValues[d].sliceValues(o+1).reset( nonLinearFit );
+					slabValues[d].xSliceValues(o).reset();
+				}
+				if( o&1 ) break;
+			}
+		}
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( pointEvaluator ) delete pointEvaluator;
+		return isoStats;
+	}
+};
+
+template< class Real , class Vertex > template< unsigned int D , unsigned int K >
+unsigned int IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K >::CellOffset[ HyperCube::Cube< D >::template ElementNum< K >() ][ HyperCube::Cube< D >::template IncidentCubeNum< K >() ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K >
+unsigned int IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K >::IncidentElementCoIndex[ HyperCube::Cube< D >::template ElementNum< K >() ][ HyperCube::Cube< D >::template IncidentCubeNum< K >() ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K >
+unsigned int IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K >::CellOffsetAntipodal[ HyperCube::Cube< D >::template ElementNum< K >() ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K >
+typename HyperCube::Cube< D >::template IncidentCubeIndex < K > IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K >::IncidentCube[ HyperCube::Cube< D >::template ElementNum< K >() ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K >
+typename HyperCube::Direction IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K >::Directions[ HyperCube::Cube< D >::template ElementNum< K >() ][ D ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K1 , unsigned int K2 >
+typename HyperCube::Cube< D >::template Element< K2 > IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K1 , K2 >::OverlapElements[ HyperCube::Cube< D >::template ElementNum< K1 >() ][ HyperCube::Cube< D >::template OverlapElementNum< K1 , K2 >() ];
+template< class Real , class Vertex > template< unsigned int D , unsigned int K1 , unsigned int K2 >
+bool IsoSurfaceExtractor< 3 , Real , Vertex >::SliceData::HyperCubeTables< D , K1 , K2 >::Overlap[ HyperCube::Cube< D >::template ElementNum< K1 >() ][ HyperCube::Cube< D >::template ElementNum< K2 >() ];
diff --git a/Src/FEMTree.SortedTreeNodes.inl b/Src/FEMTree.SortedTreeNodes.inl
new file mode 100644
index 0000000..37e7307
--- /dev/null
+++ b/Src/FEMTree.SortedTreeNodes.inl
@@ -0,0 +1,115 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+/////////////////////
+// SortedTreeNodes //
+/////////////////////
+template< unsigned int Dim >
+SortedTreeNodes< Dim >::SortedTreeNodes( void )
+{
+	_sliceStart = NullPointer( Pointer( int ) );
+	treeNodes = NullPointer( TreeNode* );
+	_levels = 0;
+}
+template< unsigned int Dim >
+SortedTreeNodes< Dim >::~SortedTreeNodes( void )
+{
+	if( _sliceStart ) for( int d=0 ; d<_levels ; d++ ) FreePointer( _sliceStart[d] );
+	FreePointer( _sliceStart );
+	DeletePointer( treeNodes );
+}
+template< unsigned int Dim >
+void SortedTreeNodes< Dim >::set( TreeNode& root , std::vector< int >* map )
+{
+	size_t sz = set( root );
+
+	if( map )
+	{
+		map->resize( sz , -1 );
+		for( int i=0 ; i<_sliceStart[_levels-1][(size_t)1<<(_levels-1)] ; i++ ) if( treeNodes[i]->nodeData.nodeIndex>=0 ) (*map)[ treeNodes[i]->nodeData.nodeIndex ] = i;
+	}
+	for( int i=0 ; i<_sliceStart[_levels-1][(size_t)1<<(_levels-1)] ; i++ ) treeNodes[i]->nodeData.nodeIndex = i;
+}
+template< unsigned int Dim >
+size_t SortedTreeNodes< Dim >::set( TreeNode& root )
+{
+	size_t sz = 0;
+	_levels = root.maxDepth()+1;
+
+	if( _sliceStart ) for( int d=0 ; d<_levels ; d++ ) FreePointer( _sliceStart[d] );
+	FreePointer( _sliceStart );
+	DeletePointer( treeNodes );
+
+	_sliceStart = AllocPointer< Pointer( int ) >( _levels );
+	for( int l=0 ; l<_levels ; l++ )
+	{
+		_sliceStart[l] = AllocPointer< int >( ((size_t)1<<l)+1 );
+		memset( _sliceStart[l] , 0 , sizeof(int)*( ((size_t)1<<l)+1 ) );
+	}
+
+	// Count the number of nodes in each slice
+	for( TreeNode* node = root.nextNode() ; node ; node = root.nextNode( node ) )
+	{
+		if( node->nodeData.nodeIndex>=0 ) sz = std::max< size_t >( node->nodeData.nodeIndex+1 , sz );
+		if( !GetGhostFlag< Dim >( node ) )
+		{
+			int d , off[Dim];
+			node->depthAndOffset( d , off );
+			_sliceStart[d][ off[Dim-1]+1 ]++;
+		}
+	}
+
+	// Get the start index for each slice
+	{
+		int levelOffset = 0;
+		for( int l=0 ; l<_levels ; l++ )
+		{
+			_sliceStart[l][0] = levelOffset;
+			for( int s=0 ; s<((size_t)1<<l); s++ ) _sliceStart[l][s+1] += _sliceStart[l][s];
+			levelOffset = _sliceStart[l][(size_t)1<<l];
+		}
+	}
+	// Allocate memory for the tree nodes
+	treeNodes = NewPointer< TreeNode* >( _sliceStart[_levels-1][(size_t)1<<(_levels-1)] );
+
+	// Add the tree nodes
+	for( TreeNode* node=root.nextNode() ; node ; node=root.nextNode( node ) ) if( !GetGhostFlag< Dim >( node ) )
+	{
+		int d , off[Dim];
+		node->depthAndOffset( d , off );
+		treeNodes[ _sliceStart[d][ off[Dim-1] ]++ ] = node;
+	}
+
+	// Shift the slice offsets up since we incremented as we added
+	for( int l=0 ; l<_levels ; l++ )
+	{
+		for( int s=(1<<l) ; s>0 ; s-- ) _sliceStart[l][s] = _sliceStart[l][s-1];
+		_sliceStart[l][0] = l>0 ? _sliceStart[l-1][(size_t)1<<(l-1)] : 0;
+	}
+	return sz;
+}
diff --git a/Src/FEMTree.System.inl b/Src/FEMTree.System.inl
new file mode 100644
index 0000000..316596e
--- /dev/null
+++ b/Src/FEMTree.System.inl
@@ -0,0 +1,3226 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+///////////////////////////////////
+// BaseFEMIntegrator::Constraint //
+///////////////////////////////////
+template< unsigned int ... TDegrees , unsigned int ... CDegrees , unsigned int CDim >
+template< bool IterateFirst >
+void BaseFEMIntegrator::Constraint< UIntPack< TDegrees ... > , UIntPack< CDegrees ... > , CDim >::setStencil( CCStencil & stencil ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	int center = ( 1<<_highDepth )>>1;
+	int femOffset[Dim] , cOffset[Dim];
+	static const int overlapStart[] = { ( IterateFirst ? BSplineOverlapSizes< CDegrees , TDegrees >::OverlapStart : BSplineOverlapSizes< TDegrees , CDegrees >::OverlapStart ) ... };
+	if( IterateFirst )
+	{
+		for( int d=0 ; d<Dim ; d++ ) cOffset[d] = center;
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineOverlapSizes< TDegrees , CDegrees >::OverlapSize ... >() , [&]( int d , int i ){ femOffset[d] = i + center + overlapStart[d]; } , [&]( Point< double , CDim >& p ){ p = ccIntegrate( femOffset , cOffset ); } , stencil() );
+	}
+	else
+	{
+		for( int d=0 ; d<Dim ; d++ ) femOffset[d] = center;
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineOverlapSizes< TDegrees , CDegrees >::OverlapSize ... >() , [&]( int d , int i ){   cOffset[d] = i + center + overlapStart[d]; } , [&]( Point< double , CDim >& p ){ p = ccIntegrate( femOffset , cOffset );} , stencil() );
+	}
+}
+template< unsigned int ... TDegrees , unsigned int ... CDegrees , unsigned int CDim >
+template< bool IterateFirst >
+void BaseFEMIntegrator::Constraint< UIntPack< TDegrees ... > , UIntPack< CDegrees ... > , CDim >::setStencils( PCStencils& stencils ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	typedef UIntPack< BSplineOverlapSizes< TDegrees, CDegrees >::OverlapSize ... > OverlapSizes;
+	// [NOTE] We want the center to be at the first node of the brood, which is not the case when childDepth is 1.
+	int center = ( 1<<_highDepth )>>1 ; center = ( center>>1 )<<1;	
+	int fineCenter[Dim] , femOffset[Dim] , cOffset[Dim];
+	static const int overlapStart[] = { ( IterateFirst ? BSplineOverlapSizes< CDegrees , TDegrees >::OverlapStart : BSplineOverlapSizes< TDegrees , CDegrees >::OverlapStart ) ... };
+	std::function< void ( int , int )               > outerUpdateState = [&]( int d , int i ){ fineCenter[Dim-d-1] = i+center; };
+	std::function< void ( Point< double , CDim >& ) > innerFunction    = [&]( Point< double , CDim >& p ){ p = pcIntegrate( femOffset , cOffset ); };
+	std::function< void ( int , int )               > innerUpdateState = [&]( int d , int i ){ femOffset[d] = IterateFirst ? (i+center/2+overlapStart[d]) : center/2 , cOffset[d] = IterateFirst ? fineCenter[d] : (i+fineCenter[d]+overlapStart[d]); };
+	std::function< void ( CCStencil& )              > outerFunction    = [&]( CCStencil& s )
+	{
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , OverlapSizes() , innerUpdateState , innerFunction , s() );
+	};
+	WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , 2 >() , outerUpdateState , outerFunction , stencils() );
+}
+template< unsigned int ... TDegrees , unsigned int ... CDegrees , unsigned int CDim >
+template< bool IterateFirst >
+void BaseFEMIntegrator::Constraint< UIntPack< TDegrees ... > , UIntPack< CDegrees ... > , CDim >::setStencils( CPStencils& stencils ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	typedef UIntPack< BSplineOverlapSizes< TDegrees , CDegrees >::OverlapSize ... > OverlapSizes;
+	// [NOTE] We want the center to be at the first node of the brood, which is not the case when childDepth is 1.
+	int center = ( 1<<_highDepth )>>1 ; center = ( center>>1 )<<1;
+	static const int overlapStart[] = { ( IterateFirst ? BSplineOverlapSizes< CDegrees , TDegrees >::OverlapStart : BSplineOverlapSizes< TDegrees , CDegrees >::OverlapStart ) ... };
+	int fineCenter[Dim] , femOffset[Dim] , cOffset[Dim];
+	std::function< void ( int , int )               > outerUpdateState = [&]( int d , int i ){ fineCenter[Dim-d-1] = i+center; };
+	std::function< void ( Point< double , CDim >& ) > innerFunction    = [&]( Point< double , CDim >& p ){ p = cpIntegrate( femOffset , cOffset ); };
+	std::function< void ( int , int )               > innerUpdateState = [&]( int d , int i ){ femOffset[d] = IterateFirst ? (i+fineCenter[d]+overlapStart[d]) : fineCenter[d] , cOffset[d] = IterateFirst ? center/2 : (i+center/2+overlapStart[d]); };
+	std::function< void ( CCStencil& )              > outerFunction    = [&]( CCStencil& s )
+	{
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , OverlapSizes() , innerUpdateState , innerFunction , s() );
+	};
+	WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , 2 >() , outerUpdateState , outerFunction , stencils() );
+}
+
+///////////////////////////////
+// BaseFEMIntegrator::System //
+///////////////////////////////
+template< unsigned int ... TDegrees >
+template< bool IterateFirst >
+void BaseFEMIntegrator::System< UIntPack< TDegrees ... > >::setStencil( CCStencil & stencil ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	int center = ( 1<<_highDepth )>>1;
+	int offset1[Dim] , offset2[Dim];
+	static const int overlapStart[] = { BSplineOverlapSizes< TDegrees , TDegrees >::OverlapStart ... };
+	if( IterateFirst )
+	{
+		for( int d=0 ; d<Dim ; d++ ) offset2[d] = center;
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineOverlapSizes< TDegrees , TDegrees >::OverlapSize ... >() , [&]( int d , int i ){ offset1[d] = i + center + overlapStart[d]; } , [&]( double& v ){ v = ccIntegrate( offset1 , offset2 ); } , stencil() );
+	}
+	else
+	{
+		for( int d=0 ; d<Dim ; d++ ) offset1[d] = center;
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineOverlapSizes< TDegrees , TDegrees >::OverlapSize ... >() , [&]( int d , int i ){ offset2[d] = i + center + overlapStart[d]; } , [&]( double& v ){ v = ccIntegrate( offset1 , offset2 ); } , stencil() );
+	}
+}
+template< unsigned int ... TDegrees >
+template< bool IterateFirst >
+void BaseFEMIntegrator::System< UIntPack< TDegrees ... > >::setStencils( PCStencils& stencils ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	typedef UIntPack< BSplineOverlapSizes< TDegrees , TDegrees >::OverlapSize ... > OverlapSizes;
+	// [NOTE] We want the center to be at the first node of the brood
+	// Which is not the case when childDepth is 1.
+	int center = ( 1<<_highDepth )>>1 ; center = ( center>>1 )<<1;
+	static const int overlapStart[] = { BSplineOverlapSizes< TDegrees , TDegrees >::OverlapStart ... };
+	int fineCenter[Dim] , offset1[Dim] , offset2[Dim];
+	std::function< void ( int , int )  > outerUpdateState = [&]( int d , int i ){ fineCenter[Dim-d-1] = i+center; };
+	std::function< void ( double& )    > innerFunction    = [&]( double& v ){ v = pcIntegrate( offset1 , offset2 ); };
+	std::function< void ( int , int )  > innerUpdateState = [&]( int d , int i ){ offset1[d] = IterateFirst ? (i+center/2+overlapStart[d]) : center/2 , offset2[d] = IterateFirst ? fineCenter[d] : (i+fineCenter[d]+overlapStart[d]); };
+	std::function< void ( CCStencil& ) > outerFunction    = [&]( CCStencil& s )
+	{
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , OverlapSizes() , innerUpdateState , innerFunction , s() );
+	};
+	WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , 2 >() , outerUpdateState , outerFunction , stencils() );
+}
+/////////////////////////////////
+// BaseFEMIntegrator::UpSample //
+/////////////////////////////////
+template< unsigned int ... TDegrees >
+void BaseFEMIntegrator::RestrictionProlongation< UIntPack< TDegrees ... > >::setStencil( UpSampleStencil & stencil ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	int highCenter = ( 1<<_highDepth )>>1;
+	int pOff[Dim] , cOff[Dim];
+	static const int upSampleStart[] = { BSplineSupportSizes< TDegrees >::UpSampleStart ... };
+	for( int d=0 ; d<Dim ; d++ ) pOff[d] = highCenter/2;
+	WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineSupportSizes< TDegrees >::UpSampleSize ... >() , [&]( int d , int i ){ cOff[d] = i + highCenter + upSampleStart[d]; } , [&]( double& v ){ v = upSampleCoefficient( pOff , cOff ); } , stencil() );
+}
+template< unsigned int ... TDegrees >
+void BaseFEMIntegrator::RestrictionProlongation< UIntPack< TDegrees ... > >::setStencils( DownSampleStencils& stencils ) const
+{
+	static const int Dim = sizeof ... ( TDegrees );
+	// [NOTE] We want the center to be at the first node of the brood, which is not the case when childDepth is 1.
+	int highCenter = ( 1<<_highDepth )>>1 ; highCenter = ( highCenter>>1 )<<1;	
+	int pOff[Dim] , cOff[Dim];
+	static const int offsets[] = { BSplineSupportSizes< TDegrees >::DownSample0Start ... };
+	std::function< void ( double& )            > innerFunction    = [&]( double& v ){ v = upSampleCoefficient( pOff , cOff ); };
+	std::function< void ( int , int )          > innerUpdateState = [&]( int d , int i ){ pOff[d] = cOff[d]/2 + i + offsets[d]; };
+	std::function< void ( int , int )          > outerUpdateState = [&]( int d , int i ){ cOff[Dim-d-1] = i+highCenter; };
+	std::function< void ( DownSampleStencil& ) > outerFunction    = [&]( DownSampleStencil& s )
+	{
+		WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , UIntPack< ( - BSplineSupportSizes< TDegrees >::DownSample0Start + BSplineSupportSizes< TDegrees >::DownSample1End + 1 ) ... >() , innerUpdateState , innerFunction , s() );
+	};
+	WindowLoop< Dim >::Run( IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , 2 >() , outerUpdateState , outerFunction , stencils() );
+}
+
+///////////////////////////////
+// FEMIntegrator::Constraint //
+///////////////////////////////
+
+template< unsigned int ... TSignatures , unsigned int ... TDerivatives , unsigned int ... CSignatures , unsigned int ... CDerivatives , unsigned int CDim >
+Point< double , CDim > FEMIntegrator::Constraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > , UIntPack< CSignatures ... > , UIntPack< CDerivatives ... > , CDim >::_integrate( IntegrationType iType , const int off1[] , const int off2[] ) const
+{
+	Point< double , CDim > integral;
+	for( unsigned int i=0 ; i<_weightedIndices.size() ; i++ )
+	{
+		const _WeightedIndices& w = _weightedIndices[i];
+		unsigned int _d1[Dim] , _d2[Dim];
+		TFactorDerivatives( w.d1 , _d1 );
+		CFactorDerivatives( w.d2 , _d2 );
+		double __integral = _integral( iType , off1 , off2 , _d1 , _d2 );
+		for( unsigned int j=0 ; j<w.indices.size() ; j++ ) integral[ w.indices[j].first ] += w.indices[j].second * __integral;
+	}
+	return integral;
+}
+
+#ifndef MOD
+#define MOD( a , b ) ( (a)>0 ? (a) % (b) : ( (b) - ( -(a) % (b) ) ) % (b) )
+#endif // MOD
+
+/////////////
+// FEMTree //
+/////////////
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::setMultiColorIndices( UIntPack< FEMSigs ... > , int depth , std::vector< std::vector< int > >& indices ) const
+{
+	_setMultiColorIndices( UIntPack< FEMSigs ... >() , _sNodesBegin(depth) , _sNodesEnd(depth) , indices );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_setMultiColorIndices( UIntPack< FEMSigs ... > , int start , int end , std::vector< std::vector< int > >& indices ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typedef UIntPack< ( 1 - BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > Moduli;
+	static const unsigned int Colors = WindowSize< Moduli >::Size;
+	indices.resize( Colors );
+	int count[ Colors ];
+	memset( count , 0 , sizeof(count) );
+	auto MCIndex = [&] ( const FEMTreeNode* node )
+	{
+		LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+		int index = 0;
+		for( int dd=0 ; dd<Dim ; dd++ ) index = index * Moduli::Values[Dim-dd-1] + MOD( off[Dim-dd-1] , Moduli::Values[Dim-dd-1] );
+		return index;
+	};
+#pragma omp parallel for
+	for( int i=start ; i<end ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		int idx = MCIndex( _sNodes.treeNodes[i] );
+#pragma omp atomic
+		count[idx]++;
+	}
+
+	for( int i=0 ; i<Colors ; i++ ) indices[i].reserve( count[i] ) , count[i]=0;
+
+	for( int i=start ; i<end ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		int idx = MCIndex( _sNodes.treeNodes[i] );
+		indices[idx].push_back( i - start );
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , typename SORWeights , unsigned int ... PointDs >
+int FEMTree< Dim , Real >::_solveFullSystemGS( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , SORWeights sorWeights , _SolverStats& stats , bool computeNorms , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	double& systemTime = stats.systemTime;
+	double&  solveTime = stats. solveTime;
+	systemTime = solveTime = 0.;
+
+	CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > > ccStencil;
+	PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > > pcStencils;
+	F.template setStencil< false >( ccStencil );
+	F.template setStencils< true >( pcStencils );
+	double bNorm=0 , inRNorm=0 , outRNorm=0;
+	if( depth>=0 )
+	{
+		SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > M;
+		double t = Time();
+		Pointer( Real ) D = AllocPointer< Real >( _sNodesEnd( depth ) - _sNodesBegin( depth ) );
+		Pointer( T ) _constraints = AllocPointer< T >( _sNodesSize( depth ) );
+		_getSliceMatrixAndProlongationConstraints( UIntPack< FEMSigs ... >() , F , M , D , bsData , depth , _sNodesBegin( depth ) , _sNodesEnd( depth ) , prolongedSolution , _constraints , ccStencil , pcStencils , interpolationInfo... );
+#pragma omp parallel for
+		for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) _constraints[ i - _sNodesBegin(depth) ] = constraints[ _sNodes.treeNodes[i]->nodeData.nodeIndex ] - _constraints[ i - _sNodesBegin(depth) ];
+		{
+			int begin = _sNodesBegin( depth ) , end = _sNodesEnd( depth );
+			for( int i=begin ; i<end ; i++ ) if( M.rowSize( i-begin ) ) D[i-begin] *= sorWeights[i];
+		}
+
+		systemTime += Time()-t;
+		// The list of multi-colored indices  for each in-memory slice
+		std::vector< std::vector< int > > mcIndices;
+		_setMultiColorIndices( UIntPack< FEMSigs ... >() , _sNodesBegin( depth ) , _sNodesEnd( depth ) , mcIndices );
+
+		ConstPointer( T ) B = _constraints;
+		Pointer( T ) X = GetPointer( &solution[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) );
+		if( computeNorms )
+#pragma omp parallel for reduction( + : bNorm , inRNorm )
+			for( int j=0 ; j<M.rows() ; j++ )
+			{
+				T temp = {};
+				ConstPointer( MatrixEntry< Real > ) start = M[j];
+				ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)M.rowSize(j);
+				ConstPointer( MatrixEntry< Real > ) e;
+				for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+				bNorm += Dot( B[j] , B[j] );
+				inRNorm += Dot( temp - B[j] , temp - B[j] );
+			}
+
+		t = Time();
+		MemoryUsage();
+		for( int i=0 ; i<iters ; i++ ) M.gsIteration( mcIndices , ( ConstPointer( Real ) )D , B , X , coarseToFine , true );
+		FreePointer( D );
+		solveTime += Time() - t;
+
+		if( computeNorms )
+#pragma omp parallel for reduction( + : outRNorm )
+			for( int j=0 ; j<M.rows() ; j++ )
+			{
+				T temp = {};
+				ConstPointer( MatrixEntry< Real > ) start = M[j];
+				ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)M.rowSize(j);
+				ConstPointer( MatrixEntry< Real > ) e;
+				for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+				outRNorm += Dot( temp-B[j] , temp-B[j] );
+			}
+		FreePointer( _constraints );
+	}
+	if( computeNorms ) stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
+	MemoryUsage();
+
+	return iters;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , typename SORWeights , unsigned int ... PointDs >
+int FEMTree< Dim , Real >::_solveSlicedSystemGS( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , unsigned int sliceBlockSize , SORWeights sorWeights , _SolverStats& stats , bool computeNorms , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	if( sliceBlockSize<=0 ) return _solveFullSystemGS( UIntPack< FEMSigs ... >() , F , bsData , depth , solution , prolongedSolution , constraints , Dot , iters , coarseToFine , sorWeights , stats , computeNorms , interpolationInfo ... );
+	CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > > ccStencil;
+	PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > > pcStencils;
+	F.template setStencil< false >( ccStencil );
+	F.template setStencils< true >( pcStencils );
+	{
+		// Assuming Degree=2 and we are solving forward using two iterations, the pattern of relaxations should look like:
+		//      +--+--+--+--+--+
+		//      *  |  |  |  |  |
+		//     o|  |  |  |  |  |
+		//    o |  |  |  |  |  |
+		//   o  |  |  |  |  |  |
+		//  o   |  |  |  |  |  |
+		// o    |  |  |  |  |  |
+		//      |  *  |  |  |  |
+		//      | *|  |  |  |  |
+		//      |* |  |  |  |  |
+		//      *  |  |  |  |  |
+		//     o|  |  |  |  |  |
+		//    o |  |  |  |  |  |
+		//      |  |  *  |  |  |
+		//      |  | *|  |  |  |
+		//      |  |* |  |  |  |
+		//      |  *  |  |  |  |
+		//      | *|  |  |  |  |
+		//      |* |  |  |  |  |
+		//      |  |  |  *  |  |
+		//      |  |  | *|  |  |
+		//      |  |  |* |  |  |
+		//      |  |  *  |  |  |
+		//      |  | *|  |  |  |
+		//      |  |* |  |  |  |
+		//      |  |  |  |  *  |
+		//      |  |  |  | *|  |
+		//      |  |  |  |* |  |
+		//      |  |  |  *  |  |
+		//      |  |  | *|  |  |
+		//      |  |  |* |  |  |
+		//      |  |  *  |  |  |
+		//      |  |  |  |  |  *
+		//      |  |  |  |  | *|
+		//      |  |  |  |  |* |
+		//      |  |  |  |  *  |
+		//      |  |  |  | *|  |
+		//      |  |  |  |* |  |
+		//      |  |  |  |  |  |  o
+		//      |  |  |  |  |  | o
+		//      |  |  |  |  |  |o
+		//      |  |  |  |  |  *
+		//      |  |  |  |  | *|
+		//      |  |  |  |  |* |
+
+		const int SliceBlockSize = (int)sliceBlockSize;
+		// OverlapRadius = Degree
+		const int OverlapRadii[] = { ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... };
+		const int OverlapBlockRadius = ( OverlapRadii[Dim-1] + SliceBlockSize - 1 ) / SliceBlockSize;
+		static const int LastFEMSig = UIntPack< FEMSigs ... >::template Get< Dim-1 >();
+		int _sliceBegin = _BSplineBegin< LastFEMSig >( depth ) , _sliceEnd = _BSplineEnd< LastFEMSig >( depth );
+
+		int blockBegin = ( _sliceBegin - ( SliceBlockSize - 1 ) ) / SliceBlockSize , blockEnd = ( _sliceEnd + ( SliceBlockSize - 1 ) ) / SliceBlockSize;
+		std::function< int ( int ) > BlockFirst = [&]( int b ){ return std::max< int >( b * SliceBlockSize , _sliceBegin ); };
+		std::function< int ( int ) > BlockLast  = [&]( int b ){ return std::min< int >( b * SliceBlockSize + SliceBlockSize - 1 , _sliceEnd - 1 ); };
+
+		auto BBlock = [&]( int d , int b , ConstPointer( T ) B )
+		{
+			return GetPointer( &B[0] + _sNodesBegin( d , BlockFirst( b ) ) , _sNodesEnd( d , BlockLast( b ) ) - _sNodesBegin( d , BlockFirst( b ) ) );
+		};
+		auto XBlocks = [&]( int d , int b , Pointer( T ) X )
+		{
+			return GetPointer( &X[0] + _sNodesBegin( d , BlockFirst( b ) ) , _sNodesBegin( d , BlockFirst( b - OverlapBlockRadius ) ) - _sNodesBegin( d , BlockFirst( b ) ) , _sNodesEnd( d , BlockLast( b + OverlapBlockRadius ) ) - _sNodesBegin( d , BlockFirst( b ) ) );
+		};
+
+		double& systemTime = stats.systemTime;
+		double&  solveTime = stats. solveTime;
+		systemTime = solveTime = 0.;
+
+		struct BlockWindow
+		{
+		protected:
+			int _begin , _end;
+		public:
+			BlockWindow( int begin , int end )
+			{
+				if( begin<=end ) _begin = begin , _end = end;
+				else             _begin = end+1 , _end = begin+1;
+			}
+			int size( void ) const { return _end-_begin; }
+			BlockWindow& operator += ( int off ){ _begin += off , _end += off ; return *this; }
+			BlockWindow& operator -= ( int off ){ _begin -= off , _end -= off ; return *this; }
+			BlockWindow& operator++ ( void ){ _begin++ , _end++ ; return *this; }
+			BlockWindow& operator-- ( void ){ _begin-- , _end-- ; return *this; }
+			int begin( bool forward ) const { return forward ? _begin : _end-1; }
+			int end  ( bool forward ) const { return forward ? _end : _begin-1; }
+			bool inBlock( int b ) const { return b>=_begin && b<_end; }
+		};
+		double bNorm=0 , inRNorm=0 , outRNorm=0;
+		bool forward = !coarseToFine;
+		int residualOffset = computeNorms ? OverlapBlockRadius : 0;
+		// Set the number of in-memory blocks required for a temporally blocked solver
+		const int ColorModulus = OverlapBlockRadius;
+		// The number of in-core blocks over which we relax
+		// [WARNING] If the block size is larger than one, we may be able to use fewer blocks
+		int solveBlocks = std::max< int >( 0 , std::min< int >( ColorModulus*iters - ( ColorModulus-1 ) , blockEnd-blockBegin ) );
+		// The number of in-core blocks over which we either solve or compute residuals
+		int matrixBlocks = std::max< int >( 1 , std::min< int >( solveBlocks+2*residualOffset , blockEnd-blockBegin ) );
+		// The list of matrices for each in-memory block
+		Pointer( SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > ) _M = NewPointer< SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > >( matrixBlocks );
+		Pointer( Pointer( Real ) ) _D = AllocPointer< Pointer( Real ) >( matrixBlocks );
+		std::vector< Pointer( T ) > _constraints( matrixBlocks );
+		for( int i=0 ; i<matrixBlocks ; i++ ) _D[i] = NullPointer( Real ) , _constraints[i] = NullPointer( T );
+		// The list of multi-colored indices  for each in-memory block
+		Pointer( std::vector< std::vector< int > > ) mcIndices = NewPointer< std::vector< std::vector< int > > >( solveBlocks );
+		int dir = forward ? 1 : -1 , start = forward ? blockBegin : blockEnd-1 , end = forward ? blockEnd : blockBegin-1;
+		const BlockWindow FullWindow( blockBegin , blockEnd );
+		BlockWindow residualWindow( FullWindow.begin(forward) , FullWindow.begin(forward) - ( ColorModulus*iters - ( ColorModulus-1 ) ) * dir - 2*residualOffset*dir );
+		BlockWindow solveWindow( FullWindow.begin(forward) - residualOffset*dir , FullWindow.begin(forward) - residualOffset*dir - ( ColorModulus*iters - ( ColorModulus-1 ) ) * dir );
+		// If we are solving forward we start in a block S with S mod ColorModulus = ColorModulus-1
+		// and end in a block E with E mod ColorModulus = 0
+		while( MOD( solveWindow.begin(!forward) , ColorModulus )!=( forward ? ColorModulus-1 : 0 ) ) solveWindow -= dir , residualWindow -= dir;
+		int maxBlockSize = 0;
+		BlockWindow _residualWindow = residualWindow;
+		for( ; _residualWindow.end(!forward)*dir<FullWindow.end(forward)*dir ; _residualWindow += dir )
+		{
+			int b = _residualWindow.begin(!forward);
+			if( FullWindow.inBlock( b ) ) maxBlockSize = std::max< int >( maxBlockSize , _sNodesEnd( depth , BlockLast( b ) ) - _sNodesBegin( depth , BlockFirst( b ) ) );
+		}
+		for( int i=0 ; i<matrixBlocks ; i++ ) _constraints[i] = AllocPointer< T >( maxBlockSize ) , _D[i] = AllocPointer< Real >( maxBlockSize );
+		for( ; residualWindow.end(!forward)*dir<FullWindow.end(forward)*dir ; residualWindow += dir , solveWindow += dir )
+		{
+			double t;
+			{
+				int frontSolveBlock =    solveWindow.begin(!forward);
+				int residualBlock   = residualWindow.begin(!forward);
+				// Get the leading matrix and compute the constraint norm / initial residual
+				// [WARNNG] This is likely wrong. We probably have to pull this into its own for "for( int _c=0 ; _c<ColorModulus ; _c++ )" loop
+				//          to ensure that adjacent read-only blocks have not been updated yet.
+				if( FullWindow.inBlock( residualBlock ) )
+				{
+					int b = residualBlock , _b = MOD( b , matrixBlocks );
+
+					t = Time();
+					_getSliceMatrixAndProlongationConstraints( UIntPack< FEMSigs ... >() , F , _M[_b] , _D[_b] , bsData , depth , _sNodesBegin( depth , BlockFirst( b ) ) , _sNodesEnd( depth , BlockLast( b ) ) , prolongedSolution , _constraints[_b] , ccStencil , pcStencils , interpolationInfo... );
+#pragma omp parallel for
+					for( int i=_sNodesBegin( depth , BlockFirst( b ) ) ; i<_sNodesEnd( depth , BlockLast( b ) ) ; i++ ) _constraints[_b][ i - _sNodesBegin( depth , BlockFirst( b ) ) ] = constraints[i] - _constraints[_b][ i - _sNodesBegin( depth , BlockFirst( b ) ) ];
+					{
+						int begin = _sNodesBegin( depth , BlockFirst( b ) ) , end = _sNodesEnd( depth , BlockLast( b ) );
+						for( int i=begin ; i<end ; i++ ) if( _M[_b].rowSize( i-begin ) ) _D[_b][i-begin] *= sorWeights[i];
+					}
+					systemTime += Time()-t;
+					if( computeNorms )
+					{
+						ConstPointer( T ) B = _constraints[_b];
+						ConstPointer( T ) X = XBlocks( depth , b , solution );
+#pragma omp parallel for reduction( + : bNorm , inRNorm )
+						for( int j=0 ; j<_M[_b].rows() ; j++ )
+						{
+							T temp = {};
+							ConstPointer( MatrixEntry< Real > ) start = _M[_b][j];
+							ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)_M[_b].rowSize(j);
+							ConstPointer( MatrixEntry< Real > ) e;
+							for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+							bNorm += Dot( B[j] , B[j] );
+							inRNorm += Dot( temp - B[j] , temp - B[j] );
+						}
+					}
+				}
+				t = Time();
+				// Get the leading multi-color indices
+				if( iters && FullWindow.inBlock( frontSolveBlock ) )
+				{
+					int b = frontSolveBlock , _b = MOD( b , matrixBlocks ) , __b = MOD( b , solveBlocks );
+					for( int i=0 ; i<int( mcIndices[__b].size() ) ; i++ ) mcIndices[__b][i].clear();
+					_setMultiColorIndices( UIntPack< FEMSigs ... >() , _sNodesBegin( depth , BlockFirst( b ) ) , _sNodesEnd( depth , BlockLast( b ) ) , mcIndices[__b] );
+				}
+			}
+
+			// Relax the system
+			for( int block=solveWindow.begin(!forward) ; solveWindow.inBlock(block) ; block-=dir*ColorModulus ) if( FullWindow.inBlock( block ) )
+			{
+				int b = block , _b = MOD( b , matrixBlocks ) , __b = MOD( b , solveBlocks );
+				ConstPointer( T ) B = _constraints[_b];
+				Pointer( T ) X = XBlocks( depth , b , solution );
+				_M[_b].gsIteration( mcIndices[__b] , ( ConstPointer( Real ) )_D[_b] , B , X , coarseToFine , true );
+			}
+			solveTime += Time() - t;
+
+			// Compute the final residual
+			{
+				int residualBlock = residualWindow.begin(forward);
+				if( computeNorms && FullWindow.inBlock( residualBlock ) )
+				{
+					int b = residualBlock , _b = MOD( b , matrixBlocks );
+					ConstPointer( T ) B = _constraints[_b];
+					ConstPointer( T ) X = XBlocks( depth , b , solution );
+#pragma omp parallel for reduction( + : outRNorm )
+					for( int j=0 ; j<_M[_b].rows() ; j++ )
+					{
+						T temp = {};
+						ConstPointer( MatrixEntry< Real > ) start = _M[_b][j];
+						ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)_M[_b].rowSize(j);
+						ConstPointer( MatrixEntry< Real > ) e;
+						for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+						outRNorm += Dot( temp-B[j] , temp-B[j] );
+					}
+				}
+			}
+		}
+		for( int i=0 ; i<matrixBlocks ; i++ ) FreePointer( _D[i] );
+		for( int i=0 ; i<matrixBlocks ; i++ ) FreePointer( _constraints[i] );
+
+		if( computeNorms ) stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
+		DeletePointer( _M );
+		DeletePointer( mcIndices );
+		FreePointer( _D );
+	}
+	MemoryUsage();
+	return iters;
+}
+#undef MOD
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+int FEMTree< Dim , Real >::_solveSystemCG( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms , double accuracy , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	int iter = 0;
+	Pointer( T ) X = GetPointer( &solution[0] + _sNodesBegin(depth) , _sNodesSize(depth) );
+	ConstPointer( T ) B = GetPointer( &constraints[0] + _sNodesBegin(depth) , _sNodesSize(depth) );
+	SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > M;
+
+	double& systemTime = stats.systemTime;
+	double&  solveTime = stats. solveTime;
+	systemTime = solveTime = 0.;
+	// Get the system matrix (and adjust the right-hand-side based on the coarser solution if prolonging)
+	systemTime = Time();
+	Pointer( T ) _constraints = AllocPointer< T >( _sNodesSize( depth ) );
+	B = _constraints;
+	CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > > ccStencil;
+	PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > > pcStencils;
+	F.template setStencil< false >( ccStencil );
+	F.template setStencils< true >( pcStencils );
+	_getSliceMatrixAndProlongationConstraints( UIntPack< FEMSigs ... >() , F , M , NullPointer( Real ) , bsData , depth , _sNodesBegin( depth ) , _sNodesEnd( depth ) , prolongedSolution , _constraints , ccStencil , pcStencils , interpolationInfo... );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) _constraints[ i - _sNodesBegin(depth) ] = constraints[i] - _constraints[ i - _sNodesBegin(depth) ];
+	systemTime = Time()-systemTime;
+	solveTime = Time();
+	// Solve the linear system
+	accuracy = Real( accuracy / 100000 ) * M.rows();
+	int dims[] = { ( _BSplineEnd< FEMSigs >( depth ) - _BSplineBegin< FEMSigs >( depth ) ) ... };
+	int nonZeroRows = 0;
+	for( int i=0 ; i<M.rows() ; i++ ) if( M.rowSize(i) ) nonZeroRows++;
+	int totalDim = 1;
+	for( int d=0 ; d<Dim ; d++ ) totalDim *= dims[d];
+	BoundaryType bTypes[] = { FEMSignature< FEMSigs >::BType ... };
+	bool hasPartitionOfUnity = true;
+	for( int d=0 ; d<Dim ; d++ ) hasPartitionOfUnity &= HasPartitionOfUnity( bTypes[d] );
+	bool addDCTerm = ( nonZeroRows==totalDim && !ConstrainsDCTerm( interpolationInfo... ) && hasPartitionOfUnity && F.vanishesOnConstants() );
+	double bNorm = 0 , inRNorm = 0 , outRNorm = 0;
+	if( computeNorms )
+	{
+#pragma omp parallel for reduction( + : bNorm , inRNorm )
+		for( int j=0 ; j<M.rows() ; j++ )
+		{
+			T temp = {};
+			ConstPointer( MatrixEntry< Real > ) start = M[j];
+			ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)M.rowSize(j);
+			ConstPointer( MatrixEntry< Real > ) e;
+			for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+			bNorm += Dot( B[j] , B[j] );
+			inRNorm += Dot( temp-B[j] , temp-B[j] );
+		}
+	}
+
+	iters = std::min< int >( nonZeroRows , iters );
+	struct SPDFunctor
+	{
+	protected:
+		const SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size >& _M;
+		bool _addDCTerm;
+	public:
+		SPDFunctor( const SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size >& M , bool addDCTerm ) : _M(M) , _addDCTerm(addDCTerm){ }
+		void operator()( ConstPointer( T ) in , Pointer( T ) out ) const
+		{
+			_M.multiply( in , out );
+			if( _addDCTerm )
+			{
+				T average = {};
+				for( int i=0 ; i<_M.rows() ; i++ ) average += in[i];
+				average /= _M.rows();
+				for( int i=0 ; i<_M.rows() ; i++ ) out[i] += average;
+			}
+		}
+	};
+	if( iters ) iter = SolveCG< SPDFunctor , T , Real >( SPDFunctor( M , addDCTerm ) , (int)M.rows() , ( ConstPointer( T ) )B , iters , X , Real( accuracy ) , Dot );
+
+	solveTime = Time()-solveTime;
+	if( computeNorms )
+	{
+#pragma omp parallel for reduction( + : outRNorm )
+		for( int j=0 ; j<M.rows() ; j++ )
+		{
+			T temp = {};
+			ConstPointer( MatrixEntry< Real > ) start = M[j];
+			ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)M.rowSize(j);
+			ConstPointer( MatrixEntry< Real > ) e;
+			for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
+			outRNorm += Dot( temp-B[j] , temp-B[j] );
+		}
+		stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
+	}
+	FreePointer( _constraints );
+
+	MemoryUsage();
+	return iter;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+void FEMTree< Dim , Real >::_solveRegularMG( UIntPack< FEMSigs ... > ,  typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) constraints , TDotT Dot , int vCycles , int iters , _SolverStats& stats , bool computeNorms , double cgAccuracy , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	double& systemTime = stats.systemTime;
+	double&  solveTime = stats. solveTime;
+
+	std::vector< SparseMatrix< Real , int > > P( depth ) , R( depth ) , M( depth+1 );
+	std::vector< Pointer( Real ) > D( depth+1 );
+	std::vector< Pointer( T ) > B( depth+1 ) , X( depth+1 ) , MX( depth+1 );
+	std::vector< std::vector< std::vector< int > > > multiColorIndices( depth+1 );
+
+	systemTime = Time();
+	M.back() = systemMatrix< Real >( UIntPack< FEMSigs ... >() , F , depth , interpolationInfo ... );
+	for( int d=depth ; d>0 ; d-- )
+	{
+		R[d-1] = downSampleMatrix( UIntPack< FEMSigs ... >() , d );
+		P[d-1] = R[d-1].transpose();
+		M[d-1] = R[d-1] * M[d] * P[d-1];
+	}
+	for( int d=0 ; d<=depth ; d++ )
+	{
+		size_t dim = M[d].rows();
+		D[d]  = AllocPointer< Real >( dim );
+		MX[d] = AllocPointer< T >( dim );
+		M[d].setDiagonalR( D[d] );
+		setMultiColorIndices( UIntPack< FEMSigs ... >() , d , multiColorIndices[d] );
+		if( d<depth )
+		{
+			X[d]  = AllocPointer< T >( dim );
+			B[d]  = AllocPointer< T >( dim );
+		}
+	}
+	X.back() = solution + nodesBegin( depth );
+	ConstPointer( T ) _B = constraints + nodesBegin( depth );
+	systemTime = Time() - systemTime;
+
+	solveTime = Time();
+
+	double bNorm = 0 , inRNorm = 0 , outRNorm = 0;
+	if( computeNorms )
+	{
+		const SparseMatrix< Real , int >& _M = M.back();
+		ConstPointer( T ) _X = X.back();
+#pragma omp parallel for reduction( + : bNorm , inRNorm )
+		for( int j=0 ; j<_M.rows() ; j++ )
+		{
+			T temp = {};
+			ConstPointer( MatrixEntry< Real > ) start = _M[j];
+			ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)_M.rowSize(j);
+			ConstPointer( MatrixEntry< Real > ) e;
+			for( e=start ; e!=end ; e++ ) temp += _X[ e->N ] * e->Value;
+			bNorm += Dot( _B[j] , _B[j] );
+			inRNorm += Dot( temp-_B[j] , temp-_B[j] );
+		}
+	}
+
+	for( int v=0 ; v<vCycles ; v++ )
+	{
+		// Restriction
+		for( int d=depth ; d>0 ; d-- )
+		{
+			ConstPointer( T ) __B = d==depth ? _B : B[d];
+			for( int i=0 ; i<iters ; i++ ) M[d].gsIteration( multiColorIndices[d] , D[d] , __B , X[d] , true , true );
+			M[d].multiply( X[d] , MX[d] );
+			for( int i=0 ; i<M[d].rows() ; i++ ) MX[d][i] = __B[i] - MX[d][i];
+			R[d-1].multiply( MX[d] , B[d-1] );
+			memset( X[d-1] , 0 , sizeof( T )*M[d-1].rows() );
+		}
+
+		// Base
+		{
+			int d = 0;
+			ConstPointer( T ) __B = d==depth ? _B : B[d];
+			struct SPDFunctor
+			{
+			protected:
+				const SparseMatrix< Real , int >& _M;
+				bool _addDCTerm;
+			public:
+				SPDFunctor( const SparseMatrix< Real , int  >& M , bool addDCTerm ) : _M(M) , _addDCTerm(addDCTerm){ }
+				void operator()( ConstPointer( T ) in , Pointer( T ) out ) const
+				{
+					_M.multiply( in , out );
+					if( _addDCTerm )
+					{
+						T average = {};
+						for( int i=0 ; i<_M.rows() ; i++ ) average += in[i];
+						average /= _M.rows();
+						for( int i=0 ; i<_M.rows() ; i++ ) out[i] += average;
+					}
+				}
+			};
+			int nonZeroRows = 0;
+			for( int i=0 ; i<M[d].rows() ; i++ ) if( M[d].rowSize(i) ) nonZeroRows++;
+			int totalDim = 1;
+			int dims[] = { ( _BSplineEnd< FEMSigs >( depth ) - _BSplineBegin< FEMSigs >( depth ) ) ... };
+			for( int dd=0 ; dd<Dim ; dd++ ) totalDim *= dims[dd];
+			BoundaryType bTypes[] = { FEMSignature< FEMSigs >::BType ... };
+			bool hasPartitionOfUnity = true;
+			for( int dd=0 ; dd<Dim ; dd++ ) hasPartitionOfUnity &= HasPartitionOfUnity( bTypes[dd] );
+			bool addDCTerm = ( nonZeroRows==totalDim && !ConstrainsDCTerm( interpolationInfo... ) && hasPartitionOfUnity && F.vanishesOnConstants() );
+
+			SolveCG< SPDFunctor , T , Real >( SPDFunctor( M[d] , addDCTerm ) , (int)M[d].rows() , ( ConstPointer( T ) )__B , nonZeroRows , X[d] , Real( cgAccuracy ) , Dot );
+		}
+
+		// Prolongation
+		for( int d=1 ; d<=depth ; d++ )
+		{
+			ConstPointer( T ) __B = d==depth ? _B : B[d];
+			P[d-1].multiply( X[d-1] , X[d] , MULTIPLY_ADD );
+			for( int i=0 ; i<iters ; i++ ) M[d].gsIteration( multiColorIndices[d] , D[d] , __B , X[d] , false , true );
+		}
+	}
+	if( computeNorms )
+	{
+		const SparseMatrix< Real , int >& _M = M.back();
+		ConstPointer( T ) _X = X.back();
+#pragma omp parallel for reduction( + : outRNorm )
+		for( int j=0 ; j<_M.rows() ; j++ )
+		{
+			T temp = {};
+			ConstPointer( MatrixEntry< Real > ) start = _M[j];
+			ConstPointer( MatrixEntry< Real > ) end = start + (unsigned long long)_M.rowSize(j);
+			ConstPointer( MatrixEntry< Real > ) e;
+			for( e=start ; e!=end ; e++ ) temp += _X[ e->N ] * e->Value;
+			outRNorm += Dot( temp-_B[j] , temp-_B[j] );
+		}
+		stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
+	}
+	solveTime = Time() - solveTime;
+
+	MemoryUsage();
+
+	for( int d=0 ; d<=depth ; d++ )
+	{
+		FreePointer( D[d] );
+		FreePointer( MX[d] );
+		if( d<depth )
+		{
+			FreePointer( X[d] );
+			FreePointer( B[d] );
+		}
+	}
+}
+
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+int FEMTree< Dim , Real >::_getMatrixRowSize( UIntPack< FEMSigs ... > , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors ) const
+#else // !__GNUC__ || __GNUC__ >=5
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+int FEMTree< Dim , Real >::_getMatrixRowSize( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors ) const
+#endif // __GNUC__ || __GNUC__ < 4
+{
+	typedef UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > OverlapSizes;
+
+	int count = 0;
+	const FEMTreeNode* const * _nodes = neighbors.neighbors.data;
+	for( int i=0 ; i<WindowSize< OverlapSizes >::Size ; i++ ) if( _isValidFEM1Node( _nodes[i] ) ) count++;
+	return count;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+int FEMTree< Dim , Real >::_getProlongedMatrixRowSize( const FEMTreeNode* node , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > OverlapSizes;
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] This change needs to be validated" )
+#endif // SHOW_WARNINGS
+	int count = 0;
+	static const WindowLoopData< OverlapSizes > loopData( []( int c , int* start , int*end ){ _SetParentOverlapBounds( FEMDegrees() , FEMDegrees() , c , start , end );} );
+	if( node->parent )
+	{
+		int c =  (int)( node - node->parent->children );
+		const unsigned int size = loopData.size[c];
+		const unsigned int* indices = loopData.indices[c];
+		ConstPointer( FEMTreeNode * const ) nodes = pNeighbors.neighbors().data;
+		for( unsigned int i=0 ; i<size ; i++ ) if( _isValidFEM1Node( nodes[ indices[i] ] ) ) count++;
+	}
+	return count;
+}
+
+
+// Given a node:
+// -- For each of its neighbors:
+// ---- Compute the weighted sum of the product of the evaluations of the associated basis functions over the points
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+void FEMTree< Dim , Real >::_addPointValues( UIntPack< FEMSigs ... > , StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* interpolationInfo ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > OverlapSizes;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack< ( -BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ... > LeftSupportRadii;
+	typedef UIntPack<    BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd     ... > RightSupportRadii;
+	typedef UIntPack<    BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd     ... > LeftPointSupportRadii;
+	typedef UIntPack< ( -BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ... > RightPointSupportRadii;
+	typedef UIntPack<    BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize    ... > SupportSizes;
+
+	if( !( FEMDegrees() >= IsotropicUIntPack< Dim , PointD >() ) ) ERROR_OUT( "Insufficient derivatives" );
+	if( !interpolationInfo ) return;
+	const InterpolationInfo< T , PointD >& iInfo = *interpolationInfo;
+
+
+	const FEMTreeNode* node = neighbors.neighbors.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+
+	PointEvaluatorState< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , PointD > > peState;
+
+	int idx[Dim];	// The coordinates of the node containing the point _relative_ to the center node
+	int _idx[Dim==1 ? 1 : Dim-1];
+	CumulativeDerivativeValues< double , Dim , PointD > dualValues;
+
+	auto outerFunction = [&]( const FEMTreeNode* _node  )
+	{
+		if( _isValidSpaceNode( _node ) )
+		{
+			LocalOffset pOff;	// The coordinates of the node containing the point
+			for( int d=0 ; d<Dim ; d++ ) pOff[d] = off[d] + idx[d];
+			size_t begin , end;
+			iInfo.range( _node , begin , end );
+			for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+			{
+				const DualPointInfo< Dim , Real , T , PointD >& pData = iInfo[ pIndex ];
+				CumulativeDerivativeValues< double , Dim , PointD > values;
+				{
+					Real weight = pData.weight;
+					Point< Real , Dim > p = pData.position;
+					// Compute the partial evaluation of all B-splines (and derivatives) that are supported on the point
+					bsData.initEvaluationState( p , d , pOff , peState );
+
+					// The value (and derivatives) of the function of the center node at this point
+					values = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( off );
+				}
+				dualValues = iInfo( pIndex , values ) * pData.weight;
+				int start[Dim==1 ? 1 : Dim-1] , end[Dim==1 ? 1 : Dim-1];
+				// Compute the bounds of nodes which can be supported on the point
+				for( int d=0 ; d<Dim-1 ; d++ ) start[d] = idx[d] + (int)OverlapRadii::Values[d] - (int)LeftPointSupportRadii::Values[d] , end[d] = idx[d] + (int)OverlapRadii::Values[d] + (int)RightPointSupportRadii::Values[d] + 1;
+				WindowLoop< Dim , Dim-1 >::Run
+				(
+					start , end , 
+					[&]( int d , int i ){ _idx[d] = i - (int)OverlapRadii::Values[d] + off[d]; } ,
+					[&]( const WindowSlice< Real , UIntPack< OverlapSizes::template Get< Dim-1 >() > > pointValues , ConstWindowSlice< const FEMTreeNode* , UIntPack< OverlapSizes::template Get< Dim-1 >() > > neighbors )
+					{
+						Point< double , PointD+1 > partialDot = peState.template partialDotDValues< Real , CumulativeDerivatives< Dim , PointD > >( dualValues , _idx );
+						Pointer( Real ) _pointValues = pointValues.data + idx[Dim-1] + OverlapRadii::Values[Dim-1];
+
+						int _i = idx[Dim-1] + (int)OverlapRadii::Values[Dim-1] - (int)LeftPointSupportRadii::Values[Dim-1];
+						const double (*splineValues)[PointD+1] = peState.template values< Dim-1 >();
+						for( unsigned int i=0 ; i<SupportSizes::Values[Dim-1] ; i++ ) if( _isValidFEM1Node( neighbors[ _i + i ] ) )
+							for( int d=0 ; d<=PointD ; d++ ) _pointValues[(int)i-(int)LeftPointSupportRadii::Values[Dim-1]] += (Real)( splineValues[i][d] * partialDot[d] );
+					} ,
+					pointValues() , neighbors.neighbors()
+				);
+			}
+		}
+	};
+	// Loop over all nodes which are supported on the center
+	WindowLoop< Dim >::Run
+	(
+		OverlapRadii() - LeftSupportRadii() , OverlapRadii() + RightSupportRadii() + IsotropicUIntPack< Dim , 1 >() ,
+		[&]( int d , int i ){ idx[d] = i - (int)OverlapRadii::Values[d]; } ,
+		outerFunction ,
+		neighbors.neighbors()
+	);
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+T FEMTree< Dim , Real >::_setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , size_t idx , SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > &M , int offset , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	T constraint ={};
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	int count = 0;
+	const FEMTreeNode* node = neighbors.neighbors.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+	Pointer( MatrixEntry< Real > ) row = M[idx];
+
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+	if( d>0 && prolongedSolution )
+	{
+		int cIdx = (int)( node - node->parent->children );
+		constraint = _getConstraintFromProlongedSolution( UIntPack< FEMSigs ... >() , F , neighbors , pNeighbors , node , prolongedSolution , pcStencils.data[cIdx] , bsData , interpolationInfo... );
+	}
+
+	bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , UIntPack< FEMSignature< FEMSigs >::Degree ... >() , d , off );
+
+	StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues;
+	memset( pointValues.data , 0 , sizeof(Real)*WindowSize< OverlapSizes >::Size );
+	_addPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , bsData , interpolationInfo ... );
+	int nodeIndex = node->nodeData.nodeIndex;
+	if( isInterior ) // General case, so try to make fast
+	{
+		const FEMTreeNode* const * _nodes = neighbors.neighbors.data;
+		ConstPointer( double ) _stencil = ccStencil.data;
+		Real* _values = pointValues.data;
+		row[count++] = MatrixEntry< Real >( nodeIndex-offset , (Real)( _values[ WindowIndex< OverlapSizes , OverlapRadii >::Index ] + _stencil[ WindowIndex< OverlapSizes , OverlapRadii >::Index ] ) );
+		for( int i=0 ; i<WindowSize< OverlapSizes >::Size ; i++ ) if( _isValidFEM1Node( _nodes[i] ) )
+		{
+			if( i!=WindowIndex< OverlapSizes , OverlapRadii >::Index ) row[count++] = MatrixEntry< Real >( _nodes[i]->nodeData.nodeIndex-offset , (Real)( _values[i] + _stencil[i] ) );
+		}
+	}
+	else
+	{
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		Real temp = (Real)F.ccIntegrate( off , off ) + pointValues.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+
+		row[count++] = MatrixEntry< Real >( nodeIndex-offset , temp );
+		LocalOffset _off;
+		WindowLoop< Dim >::Run
+		(
+			ZeroUIntPack< Dim >() , OverlapSizes() ,
+			[&]( int d , int i ){ _off[d] = off[d] - (int)OverlapRadii::Values[d] + i; } ,
+			[&]( const FEMTreeNode* _node , Real pointValue )
+		{
+			if( node!=_node && FEMIntegrator::IsValidFEMNode( UIntPack< FEMSigs ... >() , d , _off ) )
+			{
+				Real temp = (Real)F.ccIntegrate( _off , off ) + pointValue;
+				if( _isValidFEM1Node( _node ) ) row[count++] = MatrixEntry< Real >( _node->nodeData.nodeIndex-offset , temp );
+			}
+		} ,
+			neighbors.neighbors() , pointValues()
+			);
+	}
+	M.setRowSize( idx , count );
+	return constraint;
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+T FEMTree< Dim , Real >::_setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , Pointer( MatrixEntry< Real > ) row , int offset , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	T constraint ={};
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	int count = 0;
+	const FEMTreeNode* node = neighbors.neighbors.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+	if( d>0 && prolongedSolution )
+	{
+		int cIdx = (int)( node - node->parent->children );
+		constraint = _getConstraintFromProlongedSolution( UIntPack< FEMSigs ... >() , F , neighbors , pNeighbors , node , prolongedSolution , pcStencils.data[cIdx] , bsData , interpolationInfo... );
+	}
+
+	bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , UIntPack< FEMSignature< FEMSigs >::Degree ... >() , d , off );
+
+	StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues;
+	memset( pointValues.data , 0 , sizeof(Real)*WindowSize< OverlapSizes >::Size );
+	_addPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , bsData , interpolationInfo ... );
+	int nodeIndex = node->nodeData.nodeIndex;
+	if( isInterior ) // General case, so try to make fast
+	{
+		const FEMTreeNode* const * _nodes = neighbors.neighbors.data;
+		ConstPointer( double ) _stencil = ccStencil.data;
+		Real* _values = pointValues.data;
+		row[count++] = MatrixEntry< Real >( nodeIndex-offset , (Real)( _values[ WindowIndex< OverlapSizes , OverlapRadii >::Index ] + _stencil[ WindowIndex< OverlapSizes , OverlapRadii >::Index ] ) );
+		for( int i=0 ; i<WindowSize< OverlapSizes >::Size ; i++ ) if( _isValidFEM1Node( _nodes[i] ) )
+		{
+			if( i!=WindowIndex< OverlapSizes , OverlapRadii >::Index ) row[count++] = MatrixEntry< Real >( _nodes[i]->nodeData.nodeIndex-offset , (Real)( _values[i] + _stencil[i] ) );
+		}
+	}
+	else
+	{
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		Real temp = (Real)F.ccIntegrate( off , off ) + pointValues.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+	
+		row[count++] = MatrixEntry< Real >( nodeIndex-offset , temp );
+		LocalOffset _off;
+		WindowLoop< Dim >::Run
+		(
+			ZeroUIntPack< Dim >() , OverlapSizes() ,
+			[&]( int d , int i ){ _off[d] = off[d] - (int)OverlapRadii::Values[d] + i; } ,
+			[&]( const FEMTreeNode* _node , Real pointValue )
+			{
+				if( node!=_node && FEMIntegrator::IsValidFEMNode( UIntPack< FEMSigs ... >() , d , _off ) )
+				{
+					Real temp = (Real)F.ccIntegrate( _off , off ) + pointValue;
+					if( _isValidFEM1Node( _node ) ) row[count++] = MatrixEntry< Real >( _node->nodeData.nodeIndex-offset , temp );
+				}
+			} ,
+			neighbors.neighbors() , pointValues()
+		);
+	}
+	return constraint;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+void FEMTree< Dim , Real >::_addProlongedPointValues( UIntPack< FEMSigs ... > , WindowSlice< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* interpolationInfo ) const
+{
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] This code is broken" )
+#endif // SHOW_WARNINGS
+#if 1
+	ERROR_OUT( "Broken code" );
+#else
+	if( !interpolationInfo ) return;
+	const InterpolationInfo< T , PointD >& iInfo = *interpolationInfo;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	const FEMTreeNode* node = neighbors.neighbors.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+
+	LocalDepth d , parentD ; LocalOffset off , parentOff;
+	_localDepthAndOffset( node , d , off );
+	_localDepthAndOffset( node->parent , parentD , parentOff );
+	int fStart , fEnd;
+	BSplineData< FEMSig >::FunctionSpan( d , fStart , fEnd );
+
+	int fIdx[Dim];
+	functionIndex( IsotropicUIntPack< Dim , FEMSig >() , node , fIdx );
+	double       splineValues[ Dim ]               [ PointD+1 ];
+	double parentSplineValues[ Dim ][ SupportSize ][ PointD+1 ];
+	int s[Dim];
+	CumulativeDerivativeValues< Real , Dim , PointD > dualValues;
+	std::function< void ( const FEMTreeNode* , Real& ) > innerFunction = [&]( const FEMTreeNode* pNode , Real& pointValue )
+	{
+		if( _isValidFEM1Node( pNode ) )
+		{
+			CumulativeDerivativeValues< Real , Dim , PointD > values = Evaluate< SupportSize , Dim , Real , PointD >( s , parentSplineValues );
+			pointValue += CumulativeDerivativeValues< Real , Dim , PointD >::Dot( dualValues , values );
+		};
+	};
+	std::function< void ( const FEMTreeNode* ) > outerFunction = [&]( const FEMTreeNode* _node )
+	{
+		if( _isValidSpaceNode( _node ) ) for( const PointData< Dim , Real , T , PointD >* _pData=iInfo.begin( _node ) ; _pData!=iInfo.end( _node ) ; _pData++ )
+		{
+			// Evaluate the node's basis function at the sample
+			const PointData< Dim , Real , T , PointD >& pData = *_pData;
+			_setDValues< FEMSig , PointD , FEMDegree >( pData.position , _node , node , bsData , splineValues );
+			_setDValues< FEMSig , PointD , FEMDegree >( pData.position , _node->parent , bsData , parentSplineValues );
+			dualValues = iInfo.weights * Evaluate< Dim , Real , PointD >( splineValues ) * pData.weight;
+
+			// Get the indices of the parent
+			LocalDepth _parentD ; LocalOffset _parentOff;
+			_localDepthAndOffset( _node->parent , _parentD , _parentOff );
+
+			int _off[Dim];
+			for( int dd=0 ; dd<Dim ; dd++ ) _off[dd] = _parentOff[dd] - parentOff[dd];
+
+			int _start[Dim] , _end[Dim];
+			for( int dd=0 ; dd<Dim ; dd++ ) _start[dd] = OverlapRadius + _off[dd] - LeftPointSupportRadius , _end[dd] = _start[dd] + SupportSize;
+			WindowLoop< Dim >::Run
+			(
+				_start , _end ,
+				[&]( int d , int i ){ s[d] = i + LeftPointSupportRadius - _off[d] - OverlapRadius; } ,
+				innerFunction ,
+				pNeighbors.neighbors() , pointValues
+			);
+		}
+	};
+	int start[Dim] , end[Dim];
+	for( int dd=0 ; dd<Dim ; dd++ ) start[dd] = OverlapRadius - LeftSupportRadius , end[dd] = start[dd] + SupportSize;
+	WindowLoop< Dim >::Run
+	(
+		start , end ,
+		[&]( int , int ){;} ,
+		outerFunction ,
+		neighbors.neighbors()
+	);
+#endif
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+int FEMTree< Dim , Real >::_setProlongedMatrixRow( const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , Pointer( MatrixEntry< Real > ) row , int offset , const DynamicWindow< double , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& stencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+
+	int count = 0;
+	const FEMTreeNode* node = neighbors.neighbors.data[ WindowIndex< OverlapSizes , OverlapRadii >::Index ];
+	LocalDepth d , parentD ; LocalOffset off , parentOff;
+	_localDepthAndOffset( node , d , off );
+	_localDepthAndOffset( node->parent , parentD , parentOff );
+	bool isInterior = _isInteriorlyOverlapped( FEMDegrees() , FEMDegrees() , node->parent );
+	StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues;
+	memset( pointValues.data , 0 , sizeof(Real)*WindowSize< OverlapSizes >::Size );
+	_addProlongedPointValues( UIntPack< FEMSigs ... >() , pointValues() , neighbors , pNeighbors , bsData , interpolationInfo ... );
+
+	int nodeIndex = node->nodeData.nodeIndex;
+
+	int start[Dim] , end[Dim];
+	_SetParentOverlapBounds( FEMDegrees() , FEMDegrees() , node , start , end );
+	if( isInterior ) // General case, so try to make fast
+	{
+		WindowLoop< Dim >::Run
+		(
+			start , end ,
+			[&]( int , int ){;} ,
+			[&]( const FEMTreeNode* node , const Real& pointValue , const Real& stencilValue )
+			{
+				if( _isValidFEM1Node( node ) ) row[count++] = MatrixEntry< Real >( node->nodeData.nodeIndex - offset , pointValue + stencilValue );
+			} ,
+			pNeighbors.neighbors() , pointValues() , stencil()
+		);
+	}
+	else
+	{
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		WindowLoop< Dim >::Run
+		(
+			start , end , 
+			[&]( int , int ){;} ,
+			[&]( const FEMTreeNode* node , const Real& pointValue )
+			{
+				if( _isValidFEM1Node( node ) )
+				{
+					LocalDepth d ; LocalOffset _off;
+					_localDepthAndOffset( node , d , _off );
+					row[count++] = MatrixEntry< Real >( node->nodeData.nodeIndex - offset , (Real)F.pcIntegrate( _off , off ) + pointValue );
+				}
+			} ,
+			pNeighbors.neighbors() , pointValues()
+		);
+	}
+	return count;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int FEMDegree1 , unsigned int FEMDegree2 >
+void FEMTree< Dim , Real >::_SetParentOverlapBounds( const FEMTreeNode* node , int start[Dim] , int end[Dim] )
+{
+	const int OverlapStart = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapStart;
+
+	if( node->parent )
+	{
+		int cIdx = (int)( node - node->parent->children );
+		for( int d=0 ; d<Dim ; d++ )
+		{
+			start[d] = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapStart[ (cIdx>>d) & 1 ] - OverlapStart;
+			end  [d] = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapEnd  [ (cIdx>>d) & 1 ] - OverlapStart + 1;
+		}
+	}
+}
+template< unsigned int Dim , class Real >
+template< unsigned int FEMDegree1 , unsigned int FEMDegree2 >
+void FEMTree< Dim , Real >::_SetParentOverlapBounds( int cIdx , int start[Dim] , int end[Dim] )
+{
+	const int OverlapStart = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapStart;
+
+	for( int d=0 ; d<Dim ; d++ )
+	{
+		start[d] = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapStart[ (cIdx>>d) & 1 ] - OverlapStart;
+		end  [d] = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapEnd  [ (cIdx>>d) & 1 ] - OverlapStart + 1;
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+T FEMTree< Dim , Real >::_getInterpolationConstraintFromProlongedSolution( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* interpolationInfo ) const
+{
+	if( !interpolationInfo ) return T();
+	typedef UIntPack<    BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize    ... > SupportSizes;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack< ( -BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ... > LeftSupportRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+	typedef PointEvaluatorState< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > _PointEvaluatorState;
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+	T temp = {};
+	if( _isValidFEM1Node( node ) )
+	{
+		int s[Dim];
+#if defined( _WIN32 ) || defined( _WIN64 )
+#pragma message ( "[WARNING] You've got me MSVC" )
+		auto  UpdateFunction = [&]( int d , int i ){ s[d] = (int)SupportSizes::Values[d] - 1 - ( i - (int)OverlapRadii::Values[d] + (int)LeftSupportRadii::Values[d] ); };
+		auto ProcessFunction = [&]( const FEMTreeNode* pNode )
+		{
+			if( _isValidSpaceNode( pNode ) )
+			{
+				size_t begin , end;
+				interpolationInfo->range( pNode , begin , end );
+				for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+				{
+					const DualPointInfo< Dim , Real , T , PointD > _pData = (*interpolationInfo)[ pIndex ];
+					_PointEvaluatorState peState;
+					Point< Real , Dim > p = _pData.position;
+					LocalDepth pD ; LocalOffset pOff;
+					_localDepthAndOffset( pNode , pD , pOff );
+					bsData.initEvaluationState( p , pD , pOff , peState );
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] Why is this necessary?" )
+#endif // SHOW_WARNINGS
+					const int *_off = off;
+					CumulativeDerivativeValues< Real , Dim , PointD > values = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( _off );
+					for( int d=0 ; d<CumulativeDerivatives< Dim , PointD >::Size ; d++ ) temp += _pData.dualValues[d] * values[d];
+				}
+			}
+		};
+#endif // _WIN32 || _WIN64
+		WindowLoop< Dim >::Run
+		(
+			OverlapRadii() - LeftSupportRadii() , OverlapRadii() - LeftSupportRadii() + SupportSizes() ,
+#if defined( _WIN32 ) || defined( _WIN64 )
+			UpdateFunction , ProcessFunction ,
+#else // !_WIN32 && !_WIN64
+			[&]( int d , int i ){ s[d] = (int)SupportSizes::Values[d] - 1 - ( i - (int)OverlapRadii::Values[d] + (int)LeftSupportRadii::Values[d] ); } ,
+			[&]( const FEMTreeNode* pNode )
+			{
+				if( _isValidSpaceNode( pNode ) )
+				{
+					size_t begin , end;
+					interpolationInfo->range( pNode , begin , end );
+					for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+					{
+						const DualPointInfo< Dim , Real , T , PointD > _pData = (*interpolationInfo)[ pIndex ];
+						_PointEvaluatorState peState;
+						Point< Real , Dim > p = _pData.position;
+						LocalDepth pD ; LocalOffset pOff;
+						_localDepthAndOffset( pNode , pD , pOff );
+						bsData.initEvaluationState( p , pD , pOff , peState );
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] Why is this necessary?" )
+#endif // SHOW_WARNINGS
+						const int *_off = off;
+						CumulativeDerivativeValues< Real , Dim , PointD > values = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( _off );
+						for( int d=0 ; d<CumulativeDerivatives< Dim , PointD >::Size ; d++ ) temp += _pData.dualValues[d] * values[d];
+					}
+				}
+			} ,
+#endif // _WIN32 || _WIN64
+			neighbors.neighbors()
+		);
+	}
+	return temp;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int ... PointDs >
+T FEMTree< Dim , Real >::_getConstraintFromProlongedSolution( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const DynamicWindow< double , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& stencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+
+	if( _localDepth( node )<=0 ) return T();
+	// This is a conservative estimate as we only need to make sure that the parent nodes don't overlap the child (not the parent itself)
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node->parent , d , off );
+	bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( FEMDegrees() , FEMDegrees() , d , off );
+
+	// Offset the constraints using the solution from lower resolutions.
+	T constraint = {};
+	static const WindowLoopData< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > loopData( []( int c , int* start , int* end ){ BaseFEMIntegrator::ParentOverlapBounds( FEMDegrees() , FEMDegrees() , c , start , end ); } );
+	int cIdx = (int)( node - node->parent->children );
+	unsigned int size = loopData.size[cIdx];
+	const unsigned int* indices = loopData.indices[cIdx];
+	ConstPointer( double ) values = stencil.data;
+	ConstPointer( FEMTreeNode * const ) nodes = pNeighbors.neighbors().data;
+	if( isInterior )
+	{
+		for( unsigned int i=0 ; i<size ; i++ )
+		{
+			unsigned int idx = indices[i];
+			if( _isValidFEM1Node( nodes[idx] ) ) constraint += (T)( prolongedSolution[ nodes[idx]->nodeData.nodeIndex ] * (Real)values[idx] );
+		}
+	}
+	else
+	{
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		for( unsigned int i=0 ; i<size ; i++ )
+		{
+			unsigned int idx = indices[i];
+			if( _isValidFEM1Node( nodes[idx] ) )
+			{
+				LocalDepth _d ; LocalOffset _off;
+				_localDepthAndOffset( nodes[idx] , _d , _off );
+				constraint += (T)( prolongedSolution[ nodes[idx]->nodeData.nodeIndex ] * (Real)F.pcIntegrate( _off , off ) );
+			}
+
+		}
+	}
+	return constraint + _getInterpolationConstraintFromProlongedSolution( neighbors , node , prolongedSolution , bsData , interpolationInfo... );
+}
+
+// Given the solution @( depth ) add to the met constraints @( depth-1 )
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T >
+void FEMTree< Dim , Real >::_updateRestrictedIntegralConstraints( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth highDepth , ConstPointer( T ) fineSolution , Pointer( T ) restrictedConstraints ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	if( highDepth<=0 ) return;
+	// Get the stencil describing the Laplacian relating coefficients @(highDepth) with coefficients @(highDepth-1)
+	PCStencils< FEMDegrees > stencils;
+	F.template setStencils< true >(  stencils );
+	size_t start = _sNodesBegin(highDepth) , end = _sNodesEnd(highDepth) , range = end-start;
+	int lStart = _sNodesBegin(highDepth-1);
+
+	// Iterate over the nodes @(highDepth)
+	std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( highDepth )-1 );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		FEMTreeNode* node = _sNodes.treeNodes[i];
+
+		// Offset the coarser constraints using the solution from the current resolutions.
+		int cIdx = (int)( node - node->parent->children );
+
+		{
+			typename FEMTreeNode::template ConstNeighbors< OverlapSizes > pNeighbors;
+			neighborKey.getNeighbors( OverlapRadii() , OverlapRadii() , node->parent , pNeighbors );
+			const DynamicWindow< double , OverlapSizes >& stencil = stencils.data[cIdx];
+
+			bool isInterior = _isInteriorlyOverlapped( FEMDegrees() , FEMDegrees() , node->parent );
+			LocalDepth d ; LocalOffset off;
+			_localDepthAndOffset( node , d , off );
+
+			// Offset the constraints using the solution from finer resolutions.
+			int start[Dim] , end[Dim];
+			_SetParentOverlapBounds( FEMDegrees() , FEMDegrees() , node , start , end );
+
+			T solution = fineSolution[ node->nodeData.nodeIndex ];
+			ConstPointer( FEMTreeNode * const ) nodes = pNeighbors.neighbors().data;
+			ConstPointer( double ) stencilValues = stencil.data;
+			if( isInterior )
+			{
+				for( int i=0 ; i<WindowSize< OverlapSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+					AddAtomic( restrictedConstraints[ nodes[i]->nodeData.nodeIndex ] , solution * (Real)stencilValues[i] );
+			}
+			else
+			{
+				for( int i=0 ; i<WindowSize< OverlapSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+				{
+					LocalDepth _d ; LocalOffset _off;
+					_localDepthAndOffset( nodes[i] , _d , _off );
+					AddAtomic( restrictedConstraints[ nodes[i]->nodeData.nodeIndex ] , solution * (Real)F.pcIntegrate( _off , off ) );
+				}
+			}
+		}
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+void FEMTree< Dim , Real >::_setPointValuesFromProlongedSolution( LocalDepth highDepth , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , InterpolationInfo< T , PointD >* iInfo ) const
+{
+	if( !iInfo ) return;
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	InterpolationInfo< T , PointD >& interpolationInfo = *iInfo;
+
+	LocalDepth lowDepth = highDepth-1;
+	if( lowDepth<0 ) return;
+	// For every node at the current depth
+	std::vector< ConstPointSupportKey< FEMDegrees > > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
+
+#pragma omp parallel for
+	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		ConstPointSupportKey< FEMDegrees >& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
+		{
+			size_t begin , end;
+			interpolationInfo.range( _sNodes.treeNodes[i] , begin , end );
+			for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+			{
+				DualPointInfo< Dim , Real , T , PointD >& pData = interpolationInfo[ pIndex ];
+				neighborKey.getNeighbors( _sNodes.treeNodes[i]->parent );
+#ifdef _MSC_VER
+				pData.dualValues = interpolationInfo( pIndex , _coarserFunctionValues< PointD , T , FEMSigs ... >( UIntPack< FEMSigs ... >() , pData.position , neighborKey , _sNodes.treeNodes[i] , bsData , prolongedSolution ) ) * pData.weight;
+#else // !_MSC_VER
+				pData.dualValues = interpolationInfo( pIndex , _coarserFunctionValues< PointD >( UIntPack< FEMSigs ... >() , pData.position , neighborKey , _sNodes.treeNodes[i] , bsData , prolongedSolution ) ) * pData.weight;
+#endif // _MSC_VER
+			}
+		}
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+void FEMTree< Dim , Real >::_updateRestrictedInterpolationConstraints( const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth highDepth , ConstPointer( T ) solution , Pointer( T ) restrictedConstraints , const InterpolationInfo< T , PointD >* iInfo ) const
+{
+	if( !iInfo ) return;
+	const InterpolationInfo< T , PointD >& interpolationInfo = *iInfo;
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > SupportSizes;
+
+	// Note: We can't iterate over the finer point nodes as the point weights might be
+	// scaled incorrectly, due to the adaptive exponent. So instead, we will iterate
+	// over the coarser nodes and evaluate the finer solution at the associated points.
+	LocalDepth  lowDepth = highDepth-1;
+	if( lowDepth<0 ) return;
+
+	size_t start = _sNodesBegin(lowDepth) , end = _sNodesEnd(lowDepth);
+	std::vector< ConstPointSupportKey< FEMDegrees > > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
+	{
+		ConstPointSupportKey< FEMDegrees >& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		PointEvaluatorState< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > peState;
+		const FEMTreeNode* node = _sNodes.treeNodes[i];
+
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors = neighborKey.getNeighbors( node );
+		size_t begin , end;
+		interpolationInfo.range( node , begin , end );
+		for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+		{
+			const DualPointInfo< Dim , Real , T , PointD >& pData = interpolationInfo[ pIndex ];
+			Point< Real , Dim > p = pData.position;
+			bsData.initEvaluationState( p , d , off , peState );
+
+#ifdef _MSC_VER
+			CumulativeDerivativeValues< T , Dim , PointD > dualValues = interpolationInfo( pIndex , _finerFunctionValues< PointD , T , FEMSigs ... >( UIntPack< FEMSigs ... >() , pData.position , neighborKey , node , bsData , solution ) ) * pData.weight;
+#else // !_MSC_VER
+			CumulativeDerivativeValues< T , Dim , PointD > dualValues = interpolationInfo( pIndex , _finerFunctionValues< PointD >( UIntPack< FEMSigs ... >() , pData.position , neighborKey , node , bsData , solution ) ) * pData.weight;
+#endif // _MSC_VER
+			// Update constraints for all nodes @( depth-1 ) that overlap the point
+			int s[Dim];
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , SupportSizes() ,
+				[&]( int d , int i ){ s[d] = i; } ,
+				[&]( const FEMTreeNode* node )
+				{
+					if( _isValidFEM1Node( node ) )
+					{
+						LocalDepth d ; LocalOffset off;
+						_localDepthAndOffset( node , d , off );
+						CumulativeDerivativeValues< Real , Dim , PointD > values = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( off );
+						T temp = {};
+						for( int d=0 ; d<CumulativeDerivatives< Dim , PointD >::Size ; d++ ) temp += dualValues[d] * values[d];
+						AddAtomic( restrictedConstraints[ node->nodeData.nodeIndex ] , temp );
+					}
+				} ,
+				neighbors.neighbors()
+			);
+		}
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< class C , unsigned int ... FEMSigs >
+DenseNodeData< C , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::coarseCoefficients( const DenseNodeData< C , UIntPack< FEMSigs ... > >& coefficients ) const
+{
+	DenseNodeData< C , UIntPack< FEMSigs ... > > coarseCoefficients( _sNodesEnd(_maxDepth-1) );
+	memset( coarseCoefficients() , 0 , sizeof(Real)*_sNodesEnd(_maxDepth-1) );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ ) coarseCoefficients[i] = coefficients[i];
+	typename FEMIntegrator::template RestrictionProlongation< UIntPack< FEMSigs ... > > rp;
+	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample( UIntPack< FEMSigs ... >() , rp , d , coarseCoefficients() );
+	return coarseCoefficients;
+}
+
+template< unsigned int Dim , class Real >
+template< class C , unsigned int ... FEMSigs >
+DenseNodeData< C , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::coarseCoefficients( const SparseNodeData< C , UIntPack< FEMSigs ... > >& coefficients ) const
+{
+	DenseNodeData< C , UIntPack< FEMSigs ... > > coarseCoefficients( _sNodesEnd(_maxDepth-1) );
+	memset( coarseCoefficients() , 0 , sizeof(C)*_sNodesEnd(_maxDepth-1) );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ )
+	{
+		const C* c = coefficients( _sNodes.treeNodes[i] );
+		if( c ) coarseCoefficients[i] = *c;
+	}
+	typename FEMIntegrator::template RestrictionProlongation< UIntPack< FEMSigs ... > > rp;
+	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample( UIntPack< FEMSigs ... >() , rp , d , coarseCoefficients() );
+	return coarseCoefficients;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int PointD , typename T , unsigned int ... FEMSigs >
+CumulativeDerivativeValues< T , Dim , PointD > FEMTree< Dim , Real >::_coarserFunctionValues( UIntPack< FEMSigs ... > , Point< Real , Dim > p , const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* pointNode , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) solution ) const
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > SupportSizes;
+
+	CumulativeDerivativeValues< T , Dim , PointD > values;
+	LocalDepth depth = _localDepth( pointNode );
+	if( depth<0 ) return values;
+	// Iterate over all basis functions that overlap the point at the coarser resolutions
+	{
+
+		PointEvaluatorState< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > peState;
+		LocalDepth _d ; LocalOffset _off;
+		_localDepthAndOffset( pointNode->parent , _d , _off );
+		bsData.initEvaluationState( p , _d , _off , peState );
+		const typename FEMTreeNode::template ConstNeighbors< SupportSizes >& neighbors = neighborKey.neighbors[ _localToGlobal( depth-1 ) ];
+		ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+
+		for( unsigned int i=0 ; i<WindowSize< SupportSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+		{
+			LocalDepth d ; LocalOffset off;
+			_localDepthAndOffset( nodes[i] , d , off );
+			CumulativeDerivativeValues< Real , Dim , PointD > temp = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( off );
+			const T& _solution = solution[ nodes[i]->nodeData.nodeIndex ];
+			for( int s=0 ; s<CumulativeDerivatives< Dim , PointD >::Size ; s++ ) values[s] += _solution * temp[s];
+		}
+	}
+	return values;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int PointD , typename T , unsigned int ... FEMSigs >
+CumulativeDerivativeValues< T , Dim , PointD > FEMTree< Dim , Real >::_finerFunctionValues( UIntPack< FEMSigs ... > , Point< Real , Dim > p , const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* pointNode , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) solution ) const
+{
+	typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > > childNeighbors;
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree  >::SupportSize ... > SupportSizes;
+
+	CumulativeDerivativeValues< T , Dim , PointD > values;
+	LocalDepth depth = _localDepth( pointNode );
+	neighborKey.getChildNeighbors( _childIndex( pointNode , p ) , _localToGlobal( depth ) , childNeighbors );
+	PointEvaluatorState< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > peState;
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( pointNode , d , off );
+	int cIdx = _childIndex( pointNode , p );
+	d++;
+	for( int dd=0 ; dd<Dim ; dd++ ) off[dd] = (off[dd]<<1) | ( (cIdx>>dd) & 1 );
+	bsData.initEvaluationState( p , d , off , peState );
+	int s[Dim];
+	WindowLoop< Dim >::Run
+	(
+		ZeroUIntPack< Dim >() , SupportSizes() ,
+		[&]( int d , int i ){ s[d] = i; } ,
+		[&]( const FEMTreeNode* node )
+		{
+			if( _isValidFEM1Node( node ) )
+			{
+				LocalDepth d ; LocalOffset off;
+				_localDepthAndOffset( node , d , off );
+				CumulativeDerivativeValues< Real , Dim , PointD > dValues = peState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( off );
+				const T& _solution = solution[ node->nodeData.nodeIndex ];
+				for( int s=0 ; s<CumulativeDerivatives< Dim , PointD >::Size ; s++ ) values[s] += _solution * dValues[s];
+			}
+		} ,
+		childNeighbors.neighbors()
+	);
+	return values;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , unsigned int ... PointDs >
+int FEMTree< Dim , Real >::_getSliceMatrixAndProlongationConstraints( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size >& matrix , Pointer( Real ) diagonalR , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , int nBegin , int nEnd , ConstPointer( T ) prolongedSolution , Pointer( T ) constraints , const CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > OverlapSizes;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	size_t range = nEnd - nBegin;
+	matrix.resize( (int)range );
+	std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
+#pragma omp parallel for
+	for( int i=0 ; i<(int)range ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i+nBegin] ) )
+	{
+		ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		FEMTreeNode* node = _sNodes.treeNodes[i+nBegin];
+		// Get the matrix row size	
+		typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors , pNeighbors;
+		neighborKey.getNeighbors( OverlapRadii() , OverlapRadii() , node , pNeighbors , neighbors );
+		// Set the row entries
+		if( constraints ) constraints[i] = _setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... >() , F , pNeighbors , neighbors , i , matrix , nBegin , pcStencils , ccStencil , bsData , prolongedSolution , interpolationInfo... );
+		else                               _setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... >() , F , pNeighbors , neighbors , i , matrix , nBegin , pcStencils , ccStencil , bsData , prolongedSolution , interpolationInfo... );
+		if( diagonalR ) diagonalR[i] = (Real)1. / matrix[i][0].Value;
+	}
+	else if( constraints ) constraints[i] = T();
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] Why do we care if the node is not valid?" )
+#endif // SHOW_WARNINGS
+#if !defined( _WIN32 ) && !defined( _WIN64 )
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] I'm not sure how expensive this system call is on non-Windows system. (You may want to comment this out.)" )
+#endif // SHOW_WARNINGS
+#endif // !_WIN32 && !_WIN64
+	MemoryUsage();
+	return 1;
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+SparseMatrix< Real , int > FEMTree< Dim , Real >::systemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth depth , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typedef typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > > BaseSystem;
+	if( depth<0 || depth>_maxDepth ) ERROR_OUT( "System depth out of bounds: %d <= %d <= %d" , 0 , depth , _maxDepth );
+	SparseMatrix< Real , int > matrix;
+	F.init( depth );
+	PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > bsData( depth );
+
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > > stencil;
+	PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > > stencils;
+
+	F.template setStencil< false >( stencil );
+
+	matrix.resize( _sNodesSize(depth) );
+	std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(depth) ; i<_sNodesEnd( depth ) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		int ii = i - _sNodesBegin(depth);
+		ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+		neighborKey.getNeighbors( OverlapRadii() , OverlapRadii() , _sNodes.treeNodes[i] , neighbors );
+
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+		matrix.setRowSize( ii , _getMatrixRowSize( UIntPack< FEMSigs ... >() , neighbors ) );
+#else // !__GNUC__ || __GNUC__ >=5
+		matrix.setRowSize( ii , _getMatrixRowSize< FEMSigs ... >( neighbors ) );
+#endif // __GNUC__ || __GNUC__ < 4
+		_setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... >() , F ,  neighbors , neighbors , matrix[ii] , _sNodesBegin(depth) , stencils , stencil , bsData , ( ConstPointer( T ) )NullPointer( T ) , interpolationInfo ... );
+	}
+	return matrix;
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+SparseMatrix< Real , int > FEMTree< Dim , Real >::prolongedSystemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack<FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth highDepth , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	if( highDepth<=0 || highDepth>_maxDepth ) ERROR_OUT( "System depth out of bounds: %d < %d <= %d" , 0 , highDepth , _maxDepth );
+
+	LocalDepth lowDepth = highDepth-1;
+	SparseMatrix< Real , int > matrix;
+	F.init( highDepth );
+	PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > bsData( highDepth );
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapStart ) ... > OverlapRadii;
+	typedef UIntPack<    BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize    ... > OverlapSizes;
+
+	PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > > stencils;
+	F.template setStencils< true >( stencils );
+
+	matrix.resize( _sNodesSize(highDepth) );
+	std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( highDepth ) );
+#pragma omp parallel for
+	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		int ii = i - _sNodesBegin(highDepth);
+		int cIdx = (int)( _sNodes.treeNodes[i]-_sNodes.treeNodes[i]->parent->children );
+
+		ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors , pNeighbors;
+		neighborKey.getNeighbors( OverlapRadii() , OverlapRadii() , _sNodes.treeNodes[i] , neighbors );
+		neighborKey.getNeighbors( OverlapRadii() , OverlapRadii() , _sNodes.treeNodes[i]->parent , pNeighbors );
+
+		matrix.setRowSize( ii , _getProlongedMatrixRowSize< FEMSigs ... >( _sNodes.treeNodes[i] , pNeighbors ) );
+		_setProlongedMatrixRow< Real , PointDs ... >( F , neighbors , pNeighbors , matrix[ii] , _sNodesBegin(lowDepth) , stencils.data[cIdx] , bsData , interpolationInfo... );
+	}
+	return matrix;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+SparseMatrix< Real , int > FEMTree< Dim , Real >::downSampleMatrix( UIntPack< FEMSigs ... > , LocalDepth highDepth ) const
+{
+	SparseMatrix< Real , int > matrix;
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... > UpSampleSizes;
+	typedef IntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart ... > UpSampleStarts;
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< -BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart ... > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleEnd ... > > UpSampleKey;
+
+	LocalDepth lowDepth = highDepth-1;
+	if( lowDepth<0 ) return matrix;
+
+	matrix.resize( _sNodesSize( lowDepth ) );
+
+	typename EvaluationData::UpSampleEvaluator* upSampleEvaluators[] = { new typename BSplineEvaluationData< FEMSigs >::UpSampleEvaluator() ... };
+	for( int d=0 ; d<Dim ; d++ ) upSampleEvaluators[d]->set( lowDepth );
+	std::vector< UpSampleKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
+
+	DynamicWindow< double , UpSampleSizes > upSampleStencil;
+	int lowCenter = ( 1<<lowDepth )>>1;
+	double value[Dim+1] ; value[0] = 1;
+	WindowLoop< Dim >::Run
+	(
+		ZeroUIntPack< Dim >() , UpSampleSizes() ,
+		[&]( int d , int i ){ value[d+1] = value[d] * upSampleEvaluators[d]->value( lowCenter , 2*lowCenter + i + UpSampleStarts::Values[d] ); } ,
+		[&]( double&  stencilValue ){ stencilValue = value[Dim]; } ,
+		upSampleStencil()
+	);
+
+#pragma omp parallel for
+	for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		int _i = i - _sNodesBegin(lowDepth);
+		FEMTreeNode* pNode = _sNodes.treeNodes[i];
+
+		UpSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( pNode , d , off );
+		neighborKey.getNeighbors( pNode );
+		typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... > > neighbors;
+		neighborKey.getChildNeighbors( 0 , _localToGlobal( d ) , neighbors );
+
+
+		int rowSize = 0;
+		ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+		for( int i=0 ; i<WindowSize< UpSampleSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) ) rowSize++;
+
+		matrix.setRowSize( _i , rowSize );
+		matrix.rowSizes[_i] = 0;
+
+		// Want to make sure test if contained children are interior.
+		// This is more conservative because we are test that overlapping children are interior
+		bool isInterior = _isInteriorlyOverlapped( FEMDegrees() , FEMDegrees() , pNode );
+
+		if( isInterior )
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			ConstPointer( double ) stencilValues = upSampleStencil().data;
+			for( int i=0 ; i<WindowSize< UpSampleSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+				matrix[_i][ matrix.rowSizes[_i]++ ] = MatrixEntry< Real >( nodes[i]->nodeData.nodeIndex - _sNodesBegin(highDepth) , (Real)stencilValues[i] );
+		}
+		else
+		{
+			double upSampleValues[Dim][ UpSampleSizes::Max() ];
+
+			WindowLoop< Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , UpSampleSizes() ,
+				[&]( int d , int i ){ upSampleValues[d][i] = upSampleEvaluators[d]->value( off[d] , 2*off[d] + i + UpSampleStarts::Values[d] ); } ,
+				[&]( void ){}
+			);
+
+			double values[Dim+1] ; values[0] = 1;
+			WindowLoop< Dim , Dim >::Run
+			(
+				ZeroUIntPack< Dim >() , UpSampleSizes() ,
+				[&]( int d , int i ){ values[d+1] = values[d] * upSampleValues[d][i]; } ,
+				[&]( const FEMTreeNode* node ){ if( _isValidFEM1Node( node ) ) matrix[_i][ matrix.rowSizes[_i]++ ] = MatrixEntry< Real >( node->nodeData.nodeIndex - _sNodesBegin(highDepth) , (Real)values[Dim] ); } ,
+				neighbors.neighbors()
+			);
+		}
+	}
+	for( int d=0 ; d<Dim ; d++ ) delete upSampleEvaluators[d];
+	return matrix;
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+SparseMatrix< Real , int > FEMTree< Dim , Real >::fullSystemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth depth , bool nonRefinableOnly , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	SparseMatrix< Real , int > M;
+	std::vector< SparseMatrix< Real , int > >                  systemMatrices( depth+1 );
+	std::vector< SparseMatrix< Real , int > >         prolongedSystemMatrices( depth   );
+	std::vector< std::vector< SparseMatrix< Real , int > > > upSampleMatrices( depth-1 );
+
+	for( int d=0 ; d<depth-1 ; d++ ) upSampleMatrices[d].resize( depth );
+	unsigned int size = _sNodesEnd( depth );
+	for( int d=0 ; d<=depth ; d++ )
+	{
+		SparseMatrix< Real , int >& M = systemMatrices[d];
+		M.resize( size );
+		SparseMatrix< Real , int > _M = systemMatrix< Real >( UIntPack< FEMSigs ... >() , F , d , interpolationInfo ... );
+#pragma omp parallel for
+		for( int i=0 ; i<_M.rows() ; i++ )
+		{
+			M.setRowSize( i + _sNodesBegin(d) , _M.rowSize(i) );
+			for( int j=0 ; j<_M.rowSize(i) ; j++ ) M[i+_sNodesBegin(d)][j] = MatrixEntry< Real >( _M[i][j].N + _sNodesBegin(d) , _M[i][j].Value );
+		}
+	}
+	for( int d=0 ; d<depth ; d++ )
+	{
+		SparseMatrix< Real , int >& M = prolongedSystemMatrices[d];
+		M.resize( size );
+		SparseMatrix< Real , int > _M = prolongedSystemMatrix< Real >( UIntPack< FEMSigs ... >() , F , d+1 , interpolationInfo ... );
+#pragma omp parallel for
+		for( int i=0 ; i<_M.rows() ; i++ )
+		{
+			M.setRowSize( i + _sNodesBegin(d+1) , _M.rowSize(i) );
+			for( int j=0 ; j<_M.rowSize(i) ; j++ ) M[i+_sNodesBegin(d+1)][j] = MatrixEntry< Real >( _M[i][j].N + _sNodesBegin(d) , _M[i][j].Value );
+		}
+	}
+	for( int d=0 ; d<depth-1 ; d++ )
+	{
+		SparseMatrix< Real , int >& M = upSampleMatrices[d][d+1];
+		M.resize( size );
+		SparseMatrix< Real , int > _M = downSampleMatrix( UIntPack< FEMSigs ... >() , d+1 ).transpose( _sNodesSize( d+1 ) );
+#pragma omp parallel for
+		for( int i=0 ; i<_M.rows() ; i++ )
+		{
+			M.setRowSize( i + _sNodesBegin(d+1) , _M.rowSize(i) );
+			for( int j=0 ; j<_M.rowSize(i) ; j++ ) M[i+_sNodesBegin(d+1)][j] = MatrixEntry< Real >( _M[i][j].N + _sNodesBegin(d) , _M[i][j].Value );
+		}
+		for( int dd=0 ; dd<d ; dd++ ) upSampleMatrices[dd][d+1] = upSampleMatrices[d][d+1] * upSampleMatrices[dd][d];
+	}
+
+	auto Matrix = [&]( int d1 , int d2 )
+	{
+		SparseMatrix< Real , int > _M;
+		int _d1 = d1<d2 ? d1 : d2 , _d2 = d2<d1 ? d1 : d2;
+		if     ( _d1==_d2   ) _M =          systemMatrices[_d1];
+		else if( _d2==_d1+1 ) _M = prolongedSystemMatrices[_d2-1];
+		else                  _M = prolongedSystemMatrices[_d2-1] * upSampleMatrices[_d1][_d2-1];
+		if( d2<d1 ) return _M.transpose( size );
+		else        return _M;
+	};
+
+	for( int d1=0 ; d1<=depth ; d1++ )
+	{
+		M += Matrix( d1 , d1 );
+		for( int d2=0 ; d2<=depth ; d2++ ) if( d1!=d2 )
+		{
+			SparseMatrix< Real , int > _M = Matrix( d1 , d2 );
+#pragma omp parallel for
+			for( int i=0 ; i<_M.rows() ; i++ ) if( _M.rowSize(i) )
+			{
+				size_t oldSize = M.rowSize(i);
+				M.resetRowSize( i , oldSize + _M.rowSize(i) );
+				for( int j=0 ; j<_M.rowSize(i) ; j++ ) M[i][oldSize+j] = _M[i][j];
+			}
+		}
+	}
+	if( nonRefinableOnly )
+	{
+		_setRefinabilityFlags( UIntPack< FEMSigs ... >() );
+		_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+#pragma omp parallel for
+		for( int i=0 ; i<M.rows() ; i++ )
+			if( ( _isRefinableNode( _sNodes.treeNodes[i] ) && _localDepth( _sNodes.treeNodes[i] )<depth ) || !_isValidFEM1Node( _sNodes.treeNodes[i] ) )
+			{
+				// Setting this to the (local) identity so it doesn't make the system singular.
+				M.resetRowSize( i , 1 );
+				M[i][0] = MatrixEntry< Real >( i , (Real)1. );
+			}
+			else
+			{
+				int jj=0;
+				for( int j=0 ; j<M.rowSize(i) ; j++ ) if( !( _isRefinableNode( _sNodes.treeNodes[ M[i][j].N ] ) && _localDepth( _sNodes.treeNodes[ M[i][j].N ] )<depth ) && _isValidFEM1Node( _sNodes.treeNodes[i] ) ) M[i][jj++] = M[i][j];
+				if( jj!=M.rowSize(i) ) M.resetRowSize( i , jj );
+			}
+	}
+	return M;
+}
+template< unsigned int Dim , class Real >
+template< class C , unsigned int ... Degrees , unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_downSample( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< Degrees ... > >& rp , LocalDepth highDepth , Pointer( C ) constraints ) const
+{
+	LocalDepth lowDepth = highDepth-1;
+	if( lowDepth<0 ) return;
+
+	typedef typename BaseFEMIntegrator::RestrictionProlongation< UIntPack< Degrees ... > > BaseRestrictionProlongation;
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< ( - BSplineSupportSizes< Degrees >::UpSampleStart ) ... > , UIntPack< BSplineSupportSizes< Degrees >::UpSampleEnd ... > > UpSampleKey;
+	typedef typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineSupportSizes< Degrees >::UpSampleSize ... > > UpSampleNeighbors;
+	typedef UIntPack< BSplineSupportSizes< Degrees >::UpSampleSize ... > UpSampleSizes;
+
+	std::vector< UpSampleKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
+
+	( ( BaseRestrictionProlongation& )rp ).init( highDepth );
+	typename BaseRestrictionProlongation::UpSampleStencil upSampleStencil;
+	rp.setStencil( upSampleStencil );
+
+#pragma omp parallel for
+	for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		FEMTreeNode* pNode = _sNodes.treeNodes[i];
+		UpSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( pNode , d , off );
+
+		neighborKey.getNeighbors( pNode );
+		UpSampleNeighbors neighbors;
+		neighborKey.getChildNeighbors( 0 , _localToGlobal( d ) , neighbors );
+
+		C& coarseConstraint = constraints[i];
+
+		// Want to make sure test if contained children are interior.
+		// This is more conservative because we are test that overlapping children are interior
+		bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< Degrees ... >() , UIntPack< Degrees ... >() , d , off );
+		if( isInterior )
+		{
+			Pointer( const FEMTreeNode* ) nodes = neighbors.neighbors().data;
+			Pointer( double ) stencilValues = upSampleStencil.data;
+			for( unsigned int i=0 ; i<WindowSize< UpSampleSizes >::Size ; i++ )
+				if( _isValidFEM1Node( nodes[i] ) ) coarseConstraint += (C)( constraints[ nodes[i]->nodeData.nodeIndex ] * (Real)stencilValues[i] );
+		}
+		else
+		{
+			ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+			for( int i=0 ; i<WindowSize< UpSampleSizes >::Size ; i++ ) if( _isValidFEM1Node( nodes[i] ) )
+			{
+				LocalDepth _d ; LocalOffset _off;
+				_localDepthAndOffset( nodes[i] , _d , _off );
+				coarseConstraint += (C)( constraints[ nodes[i]->nodeData.nodeIndex ] * (Real)rp.upSampleCoefficient( off , _off ) );
+			}
+		}
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+DenseNodeData< Real , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::supportWeights( UIntPack< FEMSigs ... > ) const
+{
+	typedef typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > > BaseSystem;
+	typedef typename BaseFEMIntegrator::template Constraint< UIntPack< FEMSignature< FEMSigs >::Degree ... > , IsotropicUIntPack< Dim , 0 > , 1 > BaseConstraint;
+	typedef UIntPack< (  BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , 0 >::OverlapSize  ) ... >          OverlapSizes;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , 0 >::OverlapStart ) ... >  LeftFEMCOverlapRadii;
+	typedef UIntPack< (  BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , 0 >::OverlapEnd   ) ... > RightFEMCOverlapRadii;
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typename FEMIntegrator::template ScalarConstraint< UIntPack< FEMSigs ... > , ZeroUIntPack< Dim > , IsotropicUIntPack< Dim , FEMTrivialSignature > , ZeroUIntPack< Dim > > F( {1.} );
+	DenseNodeData< Real , UIntPack< FEMSigs ... > > weights = initDenseNodeData( UIntPack< FEMSigs ... >() );
+	typename BaseConstraint::CCStencil stencil;
+	std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+	for( int d=0 ; d<=_maxDepth ; d++ )
+	{
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
+		F.init( d );
+		F.template setStencil< false >( stencil );
+#pragma omp parallel for
+		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+		{
+			ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+
+			FEMTreeNode* node = _sNodes.treeNodes[i];
+			typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+			LocalOffset off;
+			{
+				LocalDepth d ; _localDepthAndOffset( node , d , off );
+			}
+			neighborKey.getNeighbors( LeftFEMCOverlapRadii() , RightFEMCOverlapRadii() , node , neighbors );
+			bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , ZeroUIntPack< Dim >() , d , off );
+			double sum=0 , totalSum=0;
+			if( isInterior )
+			{
+				ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+				ConstPointer( Point< double , 1 > ) stencilValues = stencil.data;
+				for( int i=0 ; i<WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , 0 >::OverlapSize ... > >::Size ; i++ )
+				{
+					double s = stencilValues[i][0];
+					totalSum += s;
+					if( isValidSpaceNode( nodes[i] ) ) sum += s;
+				}
+			}
+			else
+			{
+				static const int OverlapStart [] = { BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree , 0 >::OverlapStart ... };
+				LocalOffset _off;
+				WindowLoop< Dim >::Run
+				(
+					IsotropicUIntPack< Dim , 0 >() , OverlapSizes() ,
+					[&]( int d , int i ){ _off[d] = off[d]+i+OverlapStart[d]; } ,
+					[&]( const FEMTreeNode* node )
+					{
+						double s = F.ccIntegrate( off , _off )[0];
+						totalSum += s;
+						if( isValidSpaceNode( node ) ) sum += s;
+					} ,
+					neighbors.neighbors()
+				);
+			}
+			weights[i] = (Real)( sum / totalSum );
+		}
+	}
+	return weights;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+DenseNodeData< Real , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::prolongationWeights( UIntPack< FEMSigs ... > , bool prolongToChildren ) const
+{
+	DenseNodeData< Real , UIntPack< FEMSigs ... > > weights = initDenseNodeData( UIntPack< FEMSigs ... >() );
+
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typedef typename BaseFEMIntegrator::RestrictionProlongation< UIntPack< FEMSignature< FEMSigs >::Degree ... > > BaseRestrictionProlongation;
+	typedef typename     FEMIntegrator::template RestrictionProlongation< UIntPack< FEMSigs ... > > RestrictionProlongation;
+
+	typename BaseRestrictionProlongation::DownSampleStencils downSampleStencils;
+	RestrictionProlongation rp;
+
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart ) ... > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleEnd ... > > UpSampleKey;
+	typedef typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... > > UpSampleNeighbors;
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start ... > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End ... > > DownSampleKey;
+	typedef typename FEMTreeNode::template ConstNeighbors< UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start + BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End + 1 ) ... > > DownSampleNeighbors;
+	const int      UpSampleStart[] =   { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart    ... };
+	const int DownSampleStart[2][Dim] = { { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start ... } , { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1Start ... } };
+	const int   DownSampleEnd[2][Dim] = { { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0End   ... } , { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End   ... } };
+
+	std::vector< UpSampleKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( _maxDepth-1 ) );
+
+#pragma omp parallel for
+	for( int i=_sNodesBegin(_maxDepth) ; i<_sNodesEnd(_maxDepth) ; i++ ) weights[i] = (Real)0.;
+
+	for( int lowDepth=0 ; lowDepth<_maxDepth ; lowDepth++ )
+	{
+		( ( BaseRestrictionProlongation& )rp ).init( lowDepth+1 );
+		typename BaseRestrictionProlongation::UpSampleStencil upSampleStencil;
+		rp.setStencil( upSampleStencil );
+
+#pragma omp parallel for
+		for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+		{
+			FEMTreeNode* pNode = _sNodes.treeNodes[i];
+			UpSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			LocalDepth d ; LocalOffset pOff;
+			_localDepthAndOffset( pNode , d , pOff );
+
+			neighborKey.getNeighbors( pNode );
+			UpSampleNeighbors neighbors;
+			neighborKey.getChildNeighbors( 0 , _localToGlobal( d ) , neighbors );
+
+			double partialSum = 0 , totalSum = 0;
+
+			// Want to make sure test if contained children are interior.
+			// This is more conservative because we are test that overlapping children are interior
+			bool isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , UIntPack< FEMSignature< FEMSigs >::Degree ... >() , d , pOff );
+
+			LocalOffset cOff;
+			if( isInterior )
+			{
+				WindowLoop< Dim >::Run
+				(
+					IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... >() ,
+					[&]( int d , int i ){ cOff[d] = UpSampleStart[d] + pOff[d]*2 + i; } ,
+					[&]( const FEMTreeNode* node , double stencilValue )
+					{
+						if( FEMIntegrator::IsValidFEMNode( UIntPack< FEMSigs ... >() , lowDepth+1 , cOff ) )
+						{
+							totalSum += stencilValue;
+							if( _isValidFEM1Node( node ) ) partialSum += stencilValue;
+						}
+					} ,
+					neighbors.neighbors() , upSampleStencil()
+				);
+			}
+			else
+			{
+				WindowLoop< Dim >::Run
+				(
+					IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... >() ,
+					[&]( int d , int i ){ cOff[d] = UpSampleStart[d] + pOff[d]*2 + i; } ,
+					[&]( const FEMTreeNode* node )
+					{
+						if( FEMIntegrator::IsValidFEMNode( UIntPack< FEMSigs ... >() , lowDepth+1 , cOff ) )
+						{
+							double stencilValue = rp.upSampleCoefficient( pOff , cOff );
+							totalSum += stencilValue;
+							if( _isValidFEM1Node( node ) ) partialSum += stencilValue;
+						}
+					} ,
+					neighbors.neighbors() 
+				);
+			}
+			weights[i] = (Real)( partialSum / totalSum );
+		}
+	}
+	if( prolongToChildren )
+	{
+		std::vector< DownSampleKey > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( _maxDepth-1 ) );
+
+		for( int lowDepth=_maxDepth-1 ; lowDepth>=0 ; lowDepth-- )
+		{
+			( ( BaseRestrictionProlongation& )rp ).init( lowDepth+1 );
+			typename BaseRestrictionProlongation::DownSampleStencils downSampleStencils;
+			rp.setStencils( downSampleStencils );
+
+#pragma omp parallel for
+			for( int i=_sNodesBegin(lowDepth+1) ; i<_sNodesEnd(lowDepth+1) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+			{
+				FEMTreeNode *cNode = _sNodes.treeNodes[i];
+				int c = (int)( cNode-cNode->parent->children );
+
+				DownSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+				LocalDepth d ; LocalOffset cOff;
+				_localDepthAndOffset( cNode , d , cOff );
+				DownSampleNeighbors neighbors = neighborKey.getNeighbors( cNode->parent );
+				// Want to make sure test if contained children are interior.
+				// This is more conservative because we are test that overlapping children are interior
+				bool isInterior;
+				{
+					LocalDepth d ; LocalOffset pOff;
+					_localDepthAndOffset( cNode->parent , d , pOff );
+					isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMSignature< FEMSigs >::Degree ... >() , UIntPack< FEMSignature< FEMSigs >::Degree ... >() , d , pOff );
+				}
+
+				typename BaseRestrictionProlongation::DownSampleStencil& downSampleStencil = downSampleStencils.data[c];
+				int start[Dim] , end[Dim];
+				for( int d=0 ; d<Dim ; d++ ) start[d] = DownSampleStart[(c>>d)&1][d] - DownSampleStart[0][d] , end[d] = - DownSampleStart[0][d] + DownSampleEnd[(c>>d)&1][d] + 1;
+
+				double partialSum = 0 , totalSum = 0;
+				if( isInterior )
+				{
+					WindowLoop< Dim >::Run
+					(
+						start , end ,
+						[&]( int , int ){ } ,
+						[&]( const FEMTreeNode* node , double stencilValue ){ if( _isValidFEM1Node( node ) ) totalSum += stencilValue , partialSum += weights[ node->nodeData.nodeIndex ] * stencilValue; } ,
+						neighbors.neighbors() , downSampleStencil()
+					);
+				}
+				else
+				{
+					WindowLoop< Dim >::Run
+					(
+						start , end ,
+						[&]( int , int ){ } ,
+						[&]( const FEMTreeNode* node )
+						{
+							if( _isValidFEM1Node( node ) )
+							{
+								LocalDepth d ; LocalOffset pOff;
+								_localDepthAndOffset( node , d , pOff );
+								double stencilValue = rp.upSampleCoefficient( pOff , cOff );
+								totalSum += stencilValue , partialSum += weights[ node->nodeData.nodeIndex ] * stencilValue;
+							}
+						} ,
+						neighbors.neighbors()
+					);
+				}
+				weights[i] = (Real)( partialSum / totalSum );
+			}
+		}
+	}
+	return weights;
+}
+
+template< unsigned int Dim , class Real >
+template< class C , unsigned int ... Degrees , unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_upSample( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< Degrees ... > >& rp , LocalDepth highDepth , Pointer( C ) coefficients ) const
+{
+	LocalDepth lowDepth = highDepth-1;
+	if( lowDepth<0 ) return;
+	typedef typename BaseFEMIntegrator::RestrictionProlongation< UIntPack< Degrees ... > > BaseRestrictionProlongation;
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< - BSplineSupportSizes< Degrees >::DownSample0Start ... > , UIntPack< BSplineSupportSizes< Degrees >::DownSample1End ... > > DownSampleKey;
+	typedef typename FEMTreeNode::template ConstNeighbors< UIntPack< ( - BSplineSupportSizes< Degrees >::DownSample0Start + BSplineSupportSizes< Degrees >::DownSample1End + 1 ) ... > > DownSampleNeighbors;
+	typedef UIntPack< ( - BSplineSupportSizes< Degrees >::DownSample0Start + BSplineSupportSizes< Degrees >::DownSample1End + 1 ) ... > DownSampleSizes;
+
+	std::vector< DownSampleKey > neighborKeys( omp_get_max_threads() );
+	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
+
+	( ( BaseRestrictionProlongation& )rp ).init( highDepth );
+	typename BaseRestrictionProlongation::DownSampleStencils downSampleStencils;
+	rp.setStencils( downSampleStencils );
+
+	const int Start[2][Dim] = { { BSplineSupportSizes< Degrees >::DownSample0Start ... } , { BSplineSupportSizes< Degrees >::DownSample1Start ... } };
+	const int   End[2][Dim] = { { BSplineSupportSizes< Degrees >::DownSample0End   ... } , { BSplineSupportSizes< Degrees >::DownSample1End   ... } };
+
+	static const WindowLoopData< UIntPack< ( - BSplineSupportSizes< Degrees >::DownSample0Start + BSplineSupportSizes< Degrees >::DownSample1End + 1 ) ... > > loopData
+	( []( int c , int* start , int* end )
+	{
+		const int Start[2][Dim] = { { BSplineSupportSizes< Degrees >::DownSample0Start ... } , { BSplineSupportSizes< Degrees >::DownSample1Start ... } };
+		const int   End[2][Dim] = { { BSplineSupportSizes< Degrees >::DownSample0End   ... } , { BSplineSupportSizes< Degrees >::DownSample1End   ... } };
+		for( int d=0 ; d<Dim ; d++ ) start[d] = Start[(c>>d)&1][d] - Start[0][d] , end[d] = - Start[0][d] + End[(c>>d)&1][d] + 1;
+	} 
+	);
+	// For Dirichlet constraints, can't get to all children from parents because boundary nodes are invalid
+#pragma omp parallel for
+	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+	{
+		FEMTreeNode *cNode = _sNodes.treeNodes[i];
+		int c = (int)( cNode-cNode->parent->children );
+
+		DownSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+		DownSampleNeighbors neighbors = neighborKey.getNeighbors( cNode->parent );
+		// Want to make sure test if contained children are interior.
+		// This is more conservative because we are test that overlapping children are interior
+		bool isInterior;
+		{
+			LocalDepth d ; LocalOffset off;
+			_localDepthAndOffset( cNode->parent , d , off );
+			isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< Degrees ... >() , UIntPack< Degrees ... >() , d , off );
+		}
+
+		C& fineCoefficient = coefficients[ cNode->nodeData.nodeIndex ];
+
+		typename BaseRestrictionProlongation::DownSampleStencil& downSampleStencil = downSampleStencils.data[c];
+		unsigned int size = loopData.size[c];
+		const unsigned int* indices = loopData.indices[c];
+		Pointer( const FEMTreeNode* ) nodes = neighbors.neighbors().data;
+		Pointer( double ) downSampleValues = downSampleStencil.data;
+		if( isInterior )
+		{
+			for( unsigned int i=0 ; i<size ; i++ )
+			{
+				unsigned int idx = indices[i];
+				if( _isValidFEM1Node( nodes[idx] ) ) fineCoefficient += (C)( coefficients[ nodes[idx]->nodeData.nodeIndex ] * (Real)downSampleValues[idx] );
+			}
+		}
+		else
+		{
+			LocalDepth d ; LocalOffset off;
+			_localDepthAndOffset( cNode , d , off );
+			for( unsigned int i=0 ; i<size ; i++ )
+			{
+				unsigned int idx = indices[i];
+				if( _isValidFEM1Node( nodes[idx] ) )
+				{
+					LocalDepth _d ; LocalOffset _off;
+					_localDepthAndOffset( nodes[idx] , _d , _off );
+					fineCoefficient += (C)( coefficients[ nodes[idx]->nodeData.nodeIndex ] * (Real)rp.upSampleCoefficient( _off , off ) );
+				}
+			}
+		}
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< bool XMajor , class C , unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_RegularGridUpSample( UIntPack< FEMSigs ... > , LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients )
+{
+	LocalDepth lowDepth = highDepth - 1;
+	if( lowDepth<0 ) return;
+
+	int lowBegin[Dim] , lowEnd[Dim] , highBegin[Dim] , highEnd[Dim];
+	FEMIntegrator::BSplineBegin( UIntPack< FEMSigs ... >() ,  lowDepth ,  lowBegin );
+	FEMIntegrator::BSplineEnd  ( UIntPack< FEMSigs ... >() ,  lowDepth ,  lowEnd   );
+	FEMIntegrator::BSplineBegin( UIntPack< FEMSigs ... >() , highDepth , highBegin );
+	FEMIntegrator::BSplineEnd  ( UIntPack< FEMSigs ... >() , highDepth , highEnd   );
+
+	_RegularGridUpSample< XMajor >( UIntPack< FEMSigs ... >() , lowBegin , lowEnd , highBegin , highEnd , highDepth , lowCoefficients , highCoefficients );
+}
+template< unsigned int Dim , class Real >
+template< bool XMajor , class C , unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_RegularGridUpSample( UIntPack< FEMSigs ... > , const int lowBegin[] , const int lowEnd[] , const int highBegin[] , const int highEnd[] , LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients )
+{
+	// Note: In contrast to the standard grid indexing, where x is the major index in (x,y,z,...)
+	//       For our representation of the grid, x is the minor index
+	LocalDepth lowDepth = highDepth - 1;
+	if( lowDepth<0 ) return;
+
+	static const          int LeftDownSampleRadii[] = { -( ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start < BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1Start ) ? BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start : BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1Start ) ... };
+	static const          int DownSampleStart[][ sizeof...(FEMSigs) ] = { { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start ... } , { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1Start ... } };
+	static const unsigned int DownSampleSize [][ sizeof...(FEMSigs) ] = { { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Size  ... } , { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1Size  ... } };
+	typedef UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start + BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End + 1 ) ... > DownSampleSizes;
+	typedef typename     FEMIntegrator::template RestrictionProlongation< UIntPack< FEMSigs ...                         > >     RestrictionProlongation;
+	typedef typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< FEMSigs >::Degree ... > > BaseRestrictionProlongation;
+
+	RestrictionProlongation rp;
+	typename BaseRestrictionProlongation::DownSampleStencils downSampleStencils;
+	rp.init( highDepth );
+	rp.setStencils( downSampleStencils );
+
+	struct LoopData
+	{
+		unsigned int size[1<<Dim];
+		unsigned int indices[1<<Dim][ WindowSize< UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start + BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End + 1 ) ... > >::Size ];
+		long long offsets[1<<Dim][ WindowSize< UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample0Start + BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::DownSample1End + 1 ) ... > >::Size ];
+		LoopData( const int lowBegin[] , const int lowEnd[] , const int highBegin[] , const int highEnd[] )
+		{
+			int start[Dim] , end[Dim] , lowDim[Dim] , highDim[Dim];
+			for( int d=0 ; d<Dim ; d++ ) lowDim[d] = lowEnd[d] - lowBegin[d] , highDim[d] = highEnd[d] - highBegin[d];
+
+			int lowDimMultiplier[Dim] , highDimMultiplier[Dim];
+			if( XMajor )
+			{
+				lowDimMultiplier[0] = highDimMultiplier[0] = 1;
+				for( int d=1 ; d<Dim ; d++ ) lowDimMultiplier[d] = lowDimMultiplier[d-1] * (lowEnd[d-1]-lowBegin[d-1]) , highDimMultiplier[d] = highDimMultiplier[d-1] * (highEnd[d-1]-highBegin[d-1]);
+			}
+			else
+			{
+				lowDimMultiplier[Dim-1] = highDimMultiplier[Dim-1] = 1;
+				for( int d=Dim-2 ; d>=0 ; d-- ) lowDimMultiplier[d] = lowDimMultiplier[d+1] * (lowEnd[d+1]-lowBegin[d+1]) , highDimMultiplier[d] = highDimMultiplier[d+1] * (highEnd[d+1]-highBegin[d+1]);
+			}
+
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+			{
+				size[c] = 0;
+				for( int d=0 ; d<Dim ; d++ ) start[d] = DownSampleStart[(c>>d)&1][d] + LeftDownSampleRadii[d] , end[d] = start[d] + DownSampleSize[(c>>d)&1][d];
+
+				unsigned int idx[Dim];
+				long long off[Dim+1];
+				off[0] = 0;
+				WindowLoop< Dim >::Run
+				(
+					start , end ,
+					[&]( int d , int i ){ idx[d] = i ; off[d+1] = off[d] + ( i - LeftDownSampleRadii[d] - lowBegin[d] ) * lowDimMultiplier[d]; } ,
+					[&]( void ){ indices[c][ size[c] ] = GetWindowIndex( DownSampleSizes() , idx ) , offsets[c][ size[c] ] = off[Dim] ; size[c]++; }
+				);
+			}
+		}
+	};
+	const LoopData loopData( lowBegin , lowEnd , highBegin , highEnd );
+	int lowDim[Dim] , highDim[Dim];
+	for( int d=0 ; d<Dim ; d++ ) lowDim[d] = lowEnd[d] - lowBegin[d] , highDim[d] = highEnd[d] - highBegin[d];
+	int Zero[Dim];
+	for( int d=0 ; d<Dim ; d++ ) Zero[d] = 0;
+	int lowDimMultiplier[Dim] , highDimMultiplier[Dim];
+	if( XMajor )
+	{
+		lowDimMultiplier[0] = highDimMultiplier[0] = 1;
+		for( int d=1 ; d<Dim ; d++ ) lowDimMultiplier[d] = lowDimMultiplier[d-1] * (lowEnd[d-1]-lowBegin[d-1]) , highDimMultiplier[d] = highDimMultiplier[d-1] * (highEnd[d-1]-highBegin[d-1]);
+	}
+	else
+	{
+		lowDimMultiplier[Dim-1] = highDimMultiplier[Dim-1] = 1;
+		for( int d=Dim-2 ; d>=0 ; d-- ) lowDimMultiplier[d] = lowDimMultiplier[d+1] * (lowEnd[d+1]-lowBegin[d+1]) , highDimMultiplier[d] = highDimMultiplier[d+1] * (highEnd[d+1]-highBegin[d+1]);
+	}
+
+
+	struct UpdateData
+	{
+		typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > Degrees;
+		LocalOffset pOff , cOff;
+		int c;
+		long long lowIndex[Dim+1] , highIndex[Dim+1];
+		bool isInterior[Dim+1];
+		int start[Dim] , end[Dim];
+		void init( int lowDepth , const int lowBegin[] , const int lowEnd[] , const int highBegin[] , const int highEnd[] )
+		{
+			c = 0;
+			lowIndex[0] = highIndex[0] = 0;
+			isInterior[0] = true;
+			this->lowBegin = lowBegin , this->lowEnd = lowEnd , this->highBegin = highBegin , this->highEnd = highEnd;
+			if( XMajor )
+			{
+				_lowDim[0] = _highDim[0] = 1;
+				for( int d=1 ; d<Dim ; d++ ) _lowDim[d] = _lowDim[d-1] * (lowEnd[d-1]-lowBegin[d-1]) , _highDim[d] = _highDim[d-1] * (highEnd[d-1]-highBegin[d-1]);
+			}
+			else
+			{
+				_lowDim[Dim-1] = _highDim[Dim-1] = 1;
+				for( int d=Dim-2 ; d>=0 ; d-- ) _lowDim[d] = _lowDim[d+1] * (lowEnd[d+1]-lowBegin[d+1]) , _highDim[d] = _highDim[d+1] * (highEnd[d+1]-highBegin[d+1]);
+			}
+			BaseFEMIntegrator::InteriorOverlappedSpan( Degrees() , Degrees() , lowDepth , _begin , _end );
+		}
+		void set( int d , int i )
+		{
+			int ii = i + highBegin[d];
+			cOff[d] = ii;
+			pOff[d] = (ii>>1);
+			c = ( c & ( ~(1<<d) ) ) | (ii&1)<<d;
+			lowIndex[d+1] = lowIndex[d] + pOff[d] * _lowDim[d];
+			highIndex[d+1] = highIndex[d] + i * _highDim[d];
+			start[d] = DownSampleStart[(c>>d)&1][d] + LeftDownSampleRadii[d] , end[d] = start[d] + DownSampleSize[(c>>d)&1][d];
+			isInterior[d+1] = isInterior[d] && ( pOff[d] + start[d] - LeftDownSampleRadii[d] )>=lowBegin[d] && ( pOff[d] + end[d] - LeftDownSampleRadii[d] )<lowEnd[d] && pOff[d]>=_begin[d] && pOff[d]<_end[d];
+		}
+	protected:
+		const int *lowBegin , *lowEnd , *highBegin , *highEnd;
+		int _lowDim[Dim] , _highDim[Dim] , _begin[Dim] , _end[Dim];
+	};
+	std::vector< UpdateData > updateData( omp_get_max_threads() );
+	for( int i=0 ; i<updateData.size() ; i++ ) updateData[i].init( lowDepth , lowBegin , lowEnd , highBegin , highEnd );
+	WindowLoop< Dim >::RunParallel
+	(
+		Zero , highDim ,
+		[&]( int t , int d , int i ){ updateData[t].set( d , i ); } ,
+		[&]( int t )
+		{
+			const UpdateData& data = updateData[t];
+			const long long highIdx = data.highIndex[Dim] , lowIndex = data.lowIndex[Dim];
+			const int c = data.c;
+			const bool isInterior = data.isInterior[Dim];
+
+			C highCoefficient = {};
+
+			if( isInterior )
+			{
+				typename BaseRestrictionProlongation::DownSampleStencil& downSampleStencil = downSampleStencils.data[c];
+				const unsigned int size = loopData.size[c];
+				const unsigned int* idx = loopData.indices[c];
+				const long long* off = loopData.offsets[c];
+				ConstPointer( double ) stencilValues = downSampleStencil.data;
+				ConstPointer( C ) _lowCoefficients = lowCoefficients + lowIndex;
+				for( unsigned int i=0 ; i<size ; i++ ) highCoefficient += (C)( _lowCoefficients[ off[i] ] * (Real)stencilValues[ idx[i] ] );
+			}
+			else
+			{
+				const LocalOffset& pOff = data.pOff;
+				const LocalOffset& cOff = data.cOff;
+				const int* start = data.start;
+				const int* end = data.end;
+				long long lowIdx[ Dim+1 ] ; lowIdx[0] = 0;
+				bool isValid[Dim+1] ; isValid[0] = true;
+				int _pOff[Dim];
+
+				WindowLoop< Dim >::Run
+				(
+					start , end ,
+					[&]( int d , int i )
+					{
+						_pOff[d] = pOff[d] + i - LeftDownSampleRadii[d];
+						lowIdx[d+1] = lowIdx[d] + lowDimMultiplier[d] * ( _pOff[d] - lowBegin[d] );
+						isValid[d+1] = isValid[d] && ( _pOff[d]>=lowBegin[d] && _pOff[d]<lowEnd[d] );
+					} ,
+					[&]( void ){ if( isValid[Dim] ) highCoefficient += (C)( lowCoefficients[ lowIdx[Dim] ] * (Real)rp.upSampleCoefficient( _pOff , cOff ) ); }
+				);
+			}
+			highCoefficients[ highIdx ] += highCoefficient;
+		}
+	);
+}
+
+
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+DenseNodeData< T , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , TDotT Dot , LocalDepth maxSolveDepth , const typename FEMTree< Dim , Real >::SolverInfo& solverInfo , InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	DenseNodeData< T , UIntPack< FEMSigs ... > > solution;
+	solveSystem( UIntPack< FEMSigs ... >() , F , constraints , solution , Dot , maxSolveDepth , solverInfo , interpolationInfo... );
+	return solution;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+void FEMTree< Dim , Real >::solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , DenseNodeData< T , UIntPack< FEMSigs ... > >& solution , TDotT Dot , LocalDepth maxSolveDepth , const typename FEMTree< Dim , Real >::SolverInfo& solverInfo , InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+{
+	int baseDepth = solverInfo.baseDepth;
+	if( baseDepth>getFullDepth( UIntPack< FEMSignature< FEMSigs >::Degree ... >() ) ) ERROR_OUT( "Base depth cannot excceed full depth: %d <= %d" , baseDepth , getFullDepth( UIntPack< FEMSignature< FEMSigs >::Degree ... >() ) );
+
+	static_assert( Dim==sizeof ... ( FEMSigs ) , "[ERROR] FEMTree:solveSystem: Dimensions and number of signatures don't match" );
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > > bsData( sizeof...(PointDs)==0 ? 0 : maxSolveDepth );
+
+	maxSolveDepth = std::min< LocalDepth >( maxSolveDepth , _maxDepth );
+
+	bool clearSolution = solution.size()!=_sNodesEnd( _maxDepth );
+	if( clearSolution ) solution = initDenseNodeData< T >( UIntPack< FEMSigs ... >() ) , clearSolution = true;
+	bool simpleSolve = clearSolution && solverInfo.vCycles==1 && solverInfo.cascadic;
+
+	// The initial estimate of the solution (may be empty or may come in with an initial guess)
+	Pointer( T ) _solution = solution();
+	// The constraints
+	ConstPointer( T ) _constraints = constraints();
+
+	// _residualConstraints:
+	// -- stores the difference between the initial constraints and the constraints met by the current solution at all _other_ levels
+	// **** This could implemented in one of two ways:
+	// **** (1) Repeatedly computing the difference using the entire solution
+	// **** (2) Iteratively updating using the change in the solution
+	// **** We have opted for #1 to avoid having to compute/store the change in the solution after each solve
+	Pointer( T ) _residualConstraints = AllocPointer< T >( _sNodesEnd( _maxDepth-1 ) );
+	// The constraints met during the restriction phase
+	Pointer( T ) _restrictedConstraints = NullPointer( T );
+	// The solution obtained during the prolongation phase
+	Pointer( T ) _prolongedSolution = AllocPointer< T >( _sNodesEnd( _maxDepth-1 ) );
+
+	memset( _prolongedSolution , 0 , sizeof(T) * _sNodesEnd( _maxDepth-1 ) );
+	if( !( clearSolution && solverInfo.vCycles==1 && solverInfo.cascadic ) )
+	{
+		_restrictedConstraints = AllocPointer< T >( _sNodesEnd( _maxDepth-1 ) );
+		memset( _restrictedConstraints , 0 , sizeof(T) * _sNodesEnd( _maxDepth-1 ) );
+	}
+
+	Pointer( double ) _bNorm2 = NullPointer( double );
+	if( solverInfo.showGlobalResidual!=SHOW_GLOBAL_RESIDUAL_NONE )
+	{
+		_bNorm2 = AllocPointer< double >( _maxDepth+1 );
+		memset( _bNorm2 , 0 , sizeof(double) * ( _maxDepth+1 ) );
+		for( LocalDepth d=baseDepth ; d<=maxSolveDepth ; d++ ) for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) _bNorm2[d] += Dot( _constraints[i] , _constraints[i] );
+	}
+
+	auto UpdateProlongation = [&] ( int depth )
+	{
+		if( depth<_maxDepth && _prolongedSolution )
+		{
+			memset( _prolongedSolution + _sNodesBegin( depth ) , 0 , sizeof( T ) * _sNodesSize( depth ) );
+			// Up-sample the prolonged solution @(depth-1) into the prolonged solution @(depth)
+			F.init( depth );
+			if( depth>baseDepth ) _upSample( UIntPack< FEMSigs ... >() , F.restrictionProlongation() , depth , _prolongedSolution );
+			// Add in the solution @(depth) to the prolonged solution
+#pragma omp parallel for
+			for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) _prolongedSolution[i] += solution[i];
+		}
+	};
+	auto UpdateRestriction = [&]( int depth , InterpolationInfo< T , PointDs >* ... interpolationInfo )
+	{
+		if( depth>baseDepth && _restrictedConstraints )
+		{
+			memset( _restrictedConstraints + _sNodesBegin( depth-1 ) , 0 , sizeof( T ) * _sNodesSize( depth-1 ) );
+			// Update the restricted constraints @(depth-1) based on the solution @(depth)
+			F.init( depth );
+			_updateRestrictedIntegralConstraints( UIntPack< FEMSigs ... >() , F , depth , ( ConstPointer(T) )_solution , _restrictedConstraints );
+			_updateRestrictedInterpolationConstraints( bsData , depth , ( ConstPointer( T ) )_solution , _restrictedConstraints , interpolationInfo... );
+			// Down-sample the restricted constraints @(depth) into the restricted constraints @(depth-1)
+			if( depth<_maxDepth ) _downSample( UIntPack< FEMSigs ... >() , F.restrictionProlongation() , depth , _restrictedConstraints );
+		}
+	};
+	auto SetResidualConstraints = [&]( int depth , InterpolationInfo< T , PointDs >* ... interpolationInfo )
+	{
+		// Copy the constraints
+		if( depth<_maxDepth ) memcpy( _residualConstraints + _sNodesBegin(depth) , _constraints + _sNodesBegin(depth) , sizeof( T ) * _sNodesSize(depth) );
+
+		// Update the constraints @(depth) using the prolonged solution @(depth-1)
+		if( depth>baseDepth && _prolongedSolution ) _setPointValuesFromProlongedSolution( depth , bsData , ( ConstPointer( T ) )_prolongedSolution , interpolationInfo... );
+		// Update the constraints @(depth) using the restriced residual @(depth)
+		if( depth<_maxDepth && _restrictedConstraints )
+#pragma omp parallel for
+			for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) _residualConstraints[i] -= _restrictedConstraints[i];
+	};
+	auto OutputSolverStats = [&] ( int cycle , int depth , const _SolverStats& sStats , bool showResidual , int actualIters )
+	{
+		if( solverInfo.verbose )
+		{
+			int femNodes = (int)validFEMNodes( UIntPack< FEMSigs ... >() , depth );
+			if( maxSolveDepth<10 )
+				if( solverInfo.vCycles<10 ) printf( "Cycle[%d] Depth[%d/%d]:\t" , cycle , depth , maxSolveDepth );
+				else                        printf( "Cycle[%2d] Depth[%d/%d]:\t" , cycle , depth , maxSolveDepth );
+			else 
+				if( solverInfo.vCycles<10 ) printf( "Cycle[%d] Depth[%2d/%d]:\t" , cycle , depth , maxSolveDepth );
+				else                        printf( "Cycle[%2d] Depth[%2d/%d]:\t" , cycle , depth , maxSolveDepth );
+			printf( "Updated constraints / Got system / Solved in: %6.3f / %6.3f / %6.3f\t(%.3f MB)\tNodes: %d\n" , sStats.constraintUpdateTime , sStats.systemTime , sStats.solveTime , _LocalMemoryUsage , femNodes );
+		}
+		if( solverInfo.showResidual && showResidual )
+		{
+			for( int d=baseDepth ; d<depth ; d++ ) printf( "  " );
+			printf( "%s: %.4e -> %.4e -> %.4e (%.1e) [%d]\n" , depth<=solverInfo.cgDepth ? "CG" : "GS" , sqrt( sStats.bNorm2 ) , sqrt( sStats.inRNorm2 ) , sqrt( sStats.outRNorm2 ) , sqrt( sStats.outRNorm2  / sStats.inRNorm2 ) , actualIters );
+		}
+	};
+
+	// Set the cumulative solution
+	if( !clearSolution ) for( LocalDepth d=baseDepth ; d<maxSolveDepth ; d++ ) UpdateProlongation( d );
+
+	_SolverStats sStats;
+	bool showResidual;
+	int actualIters;
+	double t;
+
+	struct TrivialSORWeights{ Real operator[] ( int idx ) const { return (Real)1; } };
+	struct SORWeights
+	{
+		DenseNodeData< Real,  UIntPack< FEMSigs ... > > supportWeights , prolongationSupportWeights;
+		std::function< Real (Real,Real) > sorFunction;
+		Real operator[] ( int idx ) const
+		{
+			if     ( supportWeights() && prolongationSupportWeights() ) return sorFunction( supportWeights[idx] , prolongationSupportWeights[idx] );
+			else if( supportWeights()                                 ) return sorFunction( supportWeights[idx] , 1                               );
+			else if(                     prolongationSupportWeights() ) return sorFunction( 1                   , prolongationSupportWeights[idx] );
+			else                                                        return sorFunction( 1                   , 1                               );
+		}
+	};
+	SORWeights sorWeights;
+	if( solverInfo.useSupportWeights ) sorWeights.supportWeights = supportWeights( UIntPack< FEMSigs ... >() );
+	if( solverInfo.useProlongationSupportWeights ) sorWeights.prolongationSupportWeights = prolongationWeights( UIntPack< FEMSigs ... >() , false );
+
+	auto SolveRestriction = [&]( int v , int depth , InterpolationInfo< T , PointDs >* ... interpolationInfo )
+	{
+		sorWeights.sorFunction = solverInfo.sorRestrictionFunction;
+		// The restriction phase
+		if( solverInfo.cascadic )
+		{
+			showResidual = false;
+			if( !clearSolution || v>0 ) for( LocalDepth d=depth ; d>=baseDepth ; d-- ) { F.init( d ) ; UpdateRestriction( d , interpolationInfo ... ); }
+		}
+		else
+		{
+			bool coarseToFine = false;
+			for( LocalDepth d=depth ; d>=baseDepth ; d-- )
+			{
+				sStats.constraintUpdateTime = 0;
+				showResidual = ( d!=baseDepth );
+				int iters = solverInfo.iters( v , true , d );
+				t = Time();
+				F.init( d );
+				SetResidualConstraints( d , interpolationInfo... );
+				sStats.constraintUpdateTime += Time()-t;
+				// In the restriction phase we do not solve at the coarsest resolution since we will do so in the prolongation phase
+				if( d==baseDepth ) _solveRegularMG( UIntPack< FEMSigs ... >() , F , bsData , d , _solution , d==_maxDepth ? _constraints : _residualConstraints , Dot , solverInfo.baseVCycles , iters , sStats , solverInfo.showResidual , solverInfo.cgAccuracy , interpolationInfo... );
+				else
+				{
+					if( d>solverInfo.cgDepth ) actualIters = _solveSystemGS( UIntPack< FEMSigs ... >() , Dim!=1 , F , bsData , d , _solution , ( ConstPointer( T ) )_prolongedSolution , d==_maxDepth ? _constraints : _residualConstraints , Dot , iters , coarseToFine , solverInfo.sliceBlockSize , sorWeights , sStats , solverInfo.showResidual ,                         interpolationInfo... );
+					else                       actualIters = _solveSystemCG( UIntPack< FEMSigs ... >() ,          F , bsData , d , _solution , ( ConstPointer( T ) )_prolongedSolution , d==_maxDepth ? _constraints : _residualConstraints , Dot , iters , coarseToFine ,                                          sStats , solverInfo.showResidual , solverInfo.cgAccuracy , interpolationInfo... );
+				}
+				t = Time();
+				UpdateRestriction( d , interpolationInfo... );
+				sStats.constraintUpdateTime += Time()-t;
+				OutputSolverStats( v , d , sStats , showResidual , actualIters );
+			}
+		}
+	};
+	auto SolveProlongation = [&]( int v , int depth , InterpolationInfo< T , PointDs >* ... interpolationInfo )
+	{
+		sorWeights.sorFunction = solverInfo.sorProlongationFunction;
+		showResidual = true;
+		bool coarseToFine = true;
+		for( LocalDepth d=baseDepth ; d<=depth ; d++ )
+		{
+			sStats.constraintUpdateTime = 0;
+			int iters = solverInfo.iters( v , false , d );
+			t = Time();
+			F.init( d );
+			SetResidualConstraints( d , interpolationInfo... );
+			sStats.constraintUpdateTime += Time()-t;
+			if( d==baseDepth ) _solveRegularMG( UIntPack< FEMSigs ... >() , F , bsData , d , _solution , d==_maxDepth ? _constraints : _residualConstraints , Dot , solverInfo.baseVCycles , iters , sStats , solverInfo.showResidual , solverInfo.cgAccuracy , interpolationInfo... );
+			else
+			{
+				if( d>solverInfo.cgDepth ) actualIters = _solveSystemGS( UIntPack< FEMSigs ... >() , Dim!=1 , F , bsData , d , _solution , ( ConstPointer( T ) )_prolongedSolution , d==_maxDepth ? _constraints : _residualConstraints , Dot , iters , coarseToFine , solverInfo.sliceBlockSize , sorWeights , sStats , solverInfo.showResidual , interpolationInfo... );
+				else                       actualIters = _solveSystemCG( UIntPack< FEMSigs ... >() ,          F , bsData , d , _solution , ( ConstPointer( T ) )_prolongedSolution , d==_maxDepth ? _constraints : _residualConstraints , Dot , iters , coarseToFine , sStats , solverInfo.showResidual , solverInfo.cgAccuracy , interpolationInfo... );
+			}
+			t = Time();
+			UpdateProlongation( d );
+			sStats.constraintUpdateTime += Time()-t;
+			OutputSolverStats( v , d , sStats , showResidual , actualIters );
+		}
+	};
+
+	for( int v=0 ; v<solverInfo.vCycles ; v++ )
+	{
+		if( solverInfo.wCycle )
+		{
+			for( int d=maxSolveDepth ; d>baseDepth ; d-- )
+			{
+				SolveRestriction ( v , d   , interpolationInfo ... );
+				SolveProlongation( v , d-1 , interpolationInfo ... );
+			}
+			for( int d=baseDepth+1 ; d<=maxSolveDepth ; d++ )
+			{
+				SolveRestriction ( v , d-1 , interpolationInfo ... );
+				SolveProlongation( v , d   , interpolationInfo ... );
+			}
+		}
+		else
+		{
+			SolveRestriction ( v , maxSolveDepth , interpolationInfo ... );
+			SolveProlongation( v , maxSolveDepth , interpolationInfo ... );
+		}
+		if( solverInfo.showGlobalResidual==SHOW_GLOBAL_RESIDUAL_ALL || ( solverInfo.showGlobalResidual==SHOW_GLOBAL_RESIDUAL_LAST && v==solverInfo.vCycles-1 ) )
+		{
+			bool coarseToFine = false;
+			std::vector< double > rNorms( maxSolveDepth+1 );
+			for( LocalDepth d=maxSolveDepth ; d>=baseDepth ; d-- )
+			{
+				F.init( d );
+				SetResidualConstraints( d , interpolationInfo... );
+				_solveSystemGS( UIntPack< FEMSigs ... >() , Dim!=1 , F , bsData , d , _solution , ( ConstPointer( T ) )_prolongedSolution , d==_maxDepth ? _constraints : _residualConstraints , Dot , 0 , coarseToFine , solverInfo.sliceBlockSize , TrivialSORWeights() , sStats , true , interpolationInfo... );
+				UpdateRestriction( d , interpolationInfo... );
+				rNorms[d] = sqrt( sStats.outRNorm2 / _bNorm2[d] );
+			}
+			printf( "%3d" , v+1 );
+			for( int d=baseDepth ; d<=maxSolveDepth ; d++ ) printf( "\t%.4e" , rNorms[d] );
+			printf( "\n" );
+		}
+	}
+	MemoryUsage();
+
+	FreePointer( _residualConstraints );
+	FreePointer( _restrictedConstraints );
+	FreePointer( _prolongedSolution );
+	FreePointer( _bNorm2 );
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+DenseNodeData< Real , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::initDenseNodeData( UIntPack< FEMSigs ... > ) const
+{
+	DenseNodeData< Real , UIntPack< FEMSigs ... > > constraints( _sNodes.size() );
+	memset( constraints() , 0 , sizeof(Real)*_sNodes.size() );
+	return constraints;
+}
+template< unsigned int Dim , class Real >
+template< class Data , unsigned int ... FEMSigs >
+DenseNodeData< Data , UIntPack< FEMSigs ... > > FEMTree< Dim , Real >::initDenseNodeData( UIntPack< FEMSigs ... > ) const
+{
+	DenseNodeData< Data , UIntPack< FEMSigs ... > > constraints( _sNodes.size() );
+	memset( constraints() , 0 , sizeof(Data)*_sNodes.size() );
+	return constraints;
+}
+
+template< unsigned int Dim , class Real > template< class SReal , class Data , unsigned int _Dim > Data FEMTree< Dim , Real >::_StencilDot( Point< SReal , _Dim > p1 , Point< Data , _Dim > p2 ){ Data dot={} ; for( int d=0 ; d<_Dim ; d++ ) dot += p2[d] * (Real)p1[d] ; return dot; }
+template< unsigned int Dim , class Real > template< class SReal , class Data                     > Data FEMTree< Dim , Real >::_StencilDot( Point< SReal , 1 >    p1 , Point< Data , 1 >    p2 ){ return p2[0] * (Real)p1[0]; }
+template< unsigned int Dim , class Real > template< class SReal , class Data                     > Data FEMTree< Dim , Real >::_StencilDot( SReal                 p1 , Point< Data , 1 >    p2 ){ return p2[0] * (Real)p1; }
+template< unsigned int Dim , class Real > template< class SReal , class Data                     > Data FEMTree< Dim , Real >::_StencilDot( Point< SReal , 1 >    p1 , Data                 p2 ){ return p2 * (Real)p1[0]; }
+template< unsigned int Dim , class Real > template< class SReal , class Data                     > Data FEMTree< Dim , Real >::_StencilDot( SReal                 p1 , Data                 p2 ){ return p2*(Real)p1; }
+template< unsigned int Dim , class Real > template< class Real1 , unsigned int _Dim > bool FEMTree< Dim , Real >::_IsZero( Point< Real1 , _Dim > p ){ for( int d=0 ; d<_Dim ; d++ ) if( !_IsZero( p[d] ) ) return false ; return true; }
+template< unsigned int Dim , class Real > template< class Real1 > bool FEMTree< Dim , Real >::_IsZero( Real1 p ){ return p==0; }
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... FEMSigs , unsigned int ... CSigs , unsigned int ... FEMDegrees , unsigned int ... CDegrees , unsigned int CDim , class Coefficients >
+void FEMTree< Dim , Real >::_addFEMConstraints( UIntPack< FEMSigs ... > , UIntPack< CSigs ... > , typename BaseFEMIntegrator::template Constraint< UIntPack< FEMDegrees ... > , UIntPack< CDegrees ... > , CDim >& F , const Coefficients& coefficients , Pointer( T ) constraints , LocalDepth maxDepth ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	_setFEM2ValidityFlags( UIntPack<   CSigs ... >() );
+	typedef typename BaseFEMIntegrator::template Constraint< UIntPack< FEMDegrees ... > , UIntPack< CDegrees ... > , CDim > BaseConstraint;
+	typedef typename Coefficients::data_type D;
+	typedef UIntPack< (  BSplineOverlapSizes< CDegrees , FEMDegrees >::OverlapSize  ) ... >          OverlapSizes;
+	typedef UIntPack< ( -BSplineOverlapSizes< CDegrees , FEMDegrees >::OverlapStart ) ... >  LeftCFEMOverlapRadii;
+	typedef UIntPack< (  BSplineOverlapSizes< CDegrees , FEMDegrees >::OverlapEnd   ) ... > RightCFEMOverlapRadii;
+	typedef UIntPack< ( -BSplineOverlapSizes< FEMDegrees , CDegrees >::OverlapStart ) ... >  LeftFEMCOverlapRadii;
+	typedef UIntPack< (  BSplineOverlapSizes< FEMDegrees , CDegrees >::OverlapEnd   ) ... > RightFEMCOverlapRadii;
+
+	// To set the constraints, we iterate over the splatted normals and compute the dot-product of the divergence of the normal field with all the basis functions.
+	// Within the same depth: set directly as a gather 
+	// Coarser depths 
+	maxDepth = std::min< LocalDepth >( maxDepth , _maxDepth );
+	Pointer( T ) _constraints = AllocPointer< T >( _sNodesEnd( maxDepth-1 ) );
+	memset( _constraints , 0 , sizeof(T)*( _sNodesEnd(maxDepth-1) ) );
+	MemoryUsage();
+
+	static const WindowLoopData< UIntPack< BSplineOverlapSizes< CDegrees , FEMDegrees >::OverlapSize ... > > cfemLoopData( []( int c , int* start , int* end ){ BaseFEMIntegrator::ParentOverlapBounds( UIntPack< CDegrees ... >() , UIntPack< FEMDegrees ... >() , c , start , end ); } );
+	static const WindowLoopData< UIntPack< BSplineOverlapSizes< FEMDegrees , CDegrees >::OverlapSize ... > > femcLoopData( []( int c , int* start , int* end ){ BaseFEMIntegrator::ParentOverlapBounds( UIntPack< FEMDegrees ... >() , UIntPack< CDegrees ... >() , c , start , end ); } );
+
+	bool hasCoarserCoefficients = false;
+	// Iterate from fine to coarse, setting the constraints @(depth) and the cumulative constraints @(depth-1)
+	for( LocalDepth d=maxDepth ; d>=0 ; d-- )
+	{
+		typename BaseConstraint::CCStencil  stencil;
+		typename BaseConstraint::PCStencils stencils;
+		F.init( d );
+		F.template setStencil < false >( stencil  );
+		F.template setStencils< true  >( stencils );
+		std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
+#pragma omp parallel for
+		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
+		{
+			if( d<maxDepth ) constraints[i] += _constraints[i];
+			ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			FEMTreeNode* node = _sNodes.treeNodes[i];
+			int start[Dim] , end[] = { BSplineOverlapSizes< CDegrees , FEMDegrees >::OverlapSize ... };
+			memset( start , 0 , sizeof( start ) );
+			typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+			neighborKey.getNeighbors( LeftFEMCOverlapRadii() , RightFEMCOverlapRadii() , node , neighbors );
+			bool isInterior , isInterior2;
+			{
+				LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+				isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMDegrees ... >() , UIntPack< CDegrees ... >() , d , off );
+			}
+			{
+				LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node->parent , d , off );
+				isInterior2 = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< CDegrees ... >() , UIntPack< FEMDegrees ... >() , d , off );
+			}
+
+			LocalDepth d ; LocalOffset off;
+			_localDepthAndOffset( node , d , off );
+
+			// Set constraints from current depth
+			// Gather the constraints from _node into the constraint stored with node
+			if( _isValidFEM1Node( node ) )
+			{
+				if( isInterior )
+				{
+					unsigned int size = neighbors.neighbors.Size;
+					Pointer( const FEMTreeNode* ) nodes = neighbors.neighbors().data;
+					Pointer( Point< double , CDim > ) stencilValues = stencil.data;
+					for( unsigned int j=0 ; j<size ; j++ )
+					{
+						if( _isValidFEM2Node( nodes[j] ) )
+						{
+							const D* _data = coefficients( nodes[j] );
+							if( _data ) constraints[i] += _StencilDot( stencilValues[j] , *_data );
+						}
+					}
+				}
+				else
+				{
+					unsigned int size = neighbors.neighbors.Size;
+					Pointer( const FEMTreeNode* ) nodes = neighbors.neighbors().data;
+					for( unsigned int j=0 ; j<size ; j++ )
+					{
+						if( _isValidFEM2Node( nodes[j] ) )
+						{
+							const D* _data = coefficients( nodes[j] );
+							if( _data )
+							{
+								LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( nodes[j] , _d , _off );
+								constraints[i] += _StencilDot( F.ccIntegrate( off , _off ) , *_data );
+							}
+						}
+					}
+				}
+				BaseFEMIntegrator::ParentOverlapBounds( UIntPack< CDegrees ... >() , UIntPack< FEMDegrees ... >() , d , off , start , end );
+			}
+			if( !_isValidFEM2Node( node ) ) continue;
+			const D* _data = coefficients( node );
+			if( !_data ) continue;
+			else if( d<maxDepth ) hasCoarserCoefficients = true;
+			const D& data = *_data;
+			if( _IsZero( data ) ) continue;
+
+			// Set the _constraints for the parents
+			if( d>0 )
+			{
+				int cIdx = (int)( node - node->parent->children );
+				const typename BaseConstraint::CCStencil& _stencil = stencils.data[cIdx];
+				neighborKey.getNeighbors( LeftCFEMOverlapRadii() , RightCFEMOverlapRadii() , node->parent , neighbors );
+
+				unsigned int size = cfemLoopData.size[cIdx];
+				const unsigned int* indices = cfemLoopData.indices[cIdx];
+				ConstPointer( Point< double , CDim > ) stencilValues = _stencil.data;
+				Pointer( const FEMTreeNode* ) nodes = neighbors.neighbors().data;
+				if( isInterior2 )
+				{
+					for( unsigned int i=0 ; i<size ; i++ )
+					{
+						unsigned int idx = indices[i];
+						if( nodes[idx] ) AddAtomic( _constraints[ nodes[idx]->nodeData.nodeIndex ] , _StencilDot( stencilValues[idx] , data ) );
+					}
+				}
+				else
+				{
+					for( unsigned int i=0 ; i<size ; i++ )
+					{
+						unsigned int idx = indices[i];
+						if( nodes[idx] )
+						{
+							LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( nodes[idx] , _d , _off );
+							AddAtomic( _constraints[ nodes[idx]->nodeData.nodeIndex ] , _StencilDot( F.pcIntegrate( _off , off ) , data ) );
+						}
+					}
+				}
+			}
+		}
+		if( d>0 && d<maxDepth ) _downSample( UIntPack< FEMSigs ... >() , F.tRestrictionProlongation() , d , _constraints );
+		MemoryUsage();
+	}
+	FreePointer( _constraints );
+	if( hasCoarserCoefficients )
+	{
+		Pointer( D ) _coefficients = AllocPointer< D >( _sNodesEnd( maxDepth-1 ) );
+		memset( _coefficients , 0 , sizeof(D) * _sNodesEnd(maxDepth-1) );
+		for( LocalDepth d=maxDepth-1 ; d>=0 ; d-- )
+		{
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEM2Node( _sNodes.treeNodes[i] ) )
+			{
+				const D* d = coefficients( _sNodes.treeNodes[i] );
+				if( d ) _coefficients[i] += *d;
+			}
+		}
+
+		// Coarse-to-fine up-sampling of coefficients
+		for( LocalDepth d=1 ; d<maxDepth ; d++ ) _upSample( UIntPack< FEMSigs ... >() , F.tRestrictionProlongation() , d , _coefficients );
+		// Compute the contribution from all coarser depths
+		for( LocalDepth d=1 ; d<=maxDepth ; d++ )
+		{
+			size_t start = _sNodesBegin( d ) , end = _sNodesEnd( d ) , range = end - start;
+			typename BaseConstraint::CPStencils stencils;
+			F.init( d );
+			F.template setStencils< false >( stencils );
+			std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
+
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEM1Node( _sNodes.treeNodes[i] ) )
+			{
+				ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+				FEMTreeNode* node = _sNodes.treeNodes[i];
+				int start[Dim] , end[Dim];
+				typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+				typename FEMTreeNode::template ConstNeighbors< OverlapSizes > pNeighbors;
+				bool isInterior;
+				{
+					BaseFEMIntegrator::ParentOverlapBounds( UIntPack< FEMDegrees ... >() , UIntPack< CDegrees ... >() , (int)( node - node->parent->children ) , start , end );
+				}
+				{
+					LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node->parent , d , off );
+					neighborKey.getNeighbors( LeftFEMCOverlapRadii() , RightFEMCOverlapRadii() , node->parent , pNeighbors );
+					isInterior = BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMDegrees ... >() , UIntPack< CDegrees ... >() , d , off );
+				}
+				int cIdx = (int)( node - node->parent->children );
+				const typename BaseConstraint::CCStencil& _stencil = stencils.data[cIdx];
+
+				T constraint = {};
+
+				LocalDepth d ; LocalOffset off;
+				_localDepthAndOffset( node , d , off );
+				int corner = (int)( node - node->parent->children );
+				unsigned int size = femcLoopData.size[corner];
+				const unsigned int* indices = femcLoopData.indices[corner];
+				Pointer( const FEMTreeNode* ) nodes = pNeighbors.neighbors().data;
+				Pointer( Point< double , CDim > ) stencilValues = _stencil.data;
+				if( isInterior )
+					for( unsigned int i=0 ; i<size ; i++ )
+					{
+						unsigned int idx = indices[i];
+						if( _isValidFEM2Node( nodes[idx] ) ) constraint += _StencilDot( stencilValues[idx] , _coefficients[ nodes[idx]->nodeData.nodeIndex ] );
+					}
+				else
+					for( unsigned int i=0 ; i<size ; i++ )
+					{
+						unsigned int idx = indices[i];
+						if( _isValidFEM2Node( nodes[idx] ) )
+						{
+							LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset ( nodes[idx] , _d , _off );
+							constraint += _StencilDot( F.cpIntegrate( off , _off ) , _coefficients[ nodes[idx]->nodeData.nodeIndex ] );
+						}
+					}
+				constraints[i] += constraint;
+			}
+		}
+		FreePointer( _coefficients );
+	}
+	MemoryUsage();
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int ... FEMSigs , unsigned int PointD >
+void FEMTree< Dim , Real >::addInterpolationConstraints( DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth , const InterpolationInfo< T , PointD >& interpolationInfo ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs ... >() );
+	typedef typename FEMIntegrator::template PointEvaluator< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , PointD > > PointEvaluator;
+	PointEvaluator evaluator( std::min< LocalDepth >( maxDepth , _maxDepth ) );
+
+	typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd ... > , UIntPack< ( -BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ...  > > PointSupportKey;
+	maxDepth = std::min< LocalDepth >( maxDepth , _maxDepth );
+	{
+		typedef UIntPack< (-BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ... >       LeftSupportRadii;
+		typedef UIntPack< ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportEnd   ) ... >  LeftPointSupportRadii;
+		typedef UIntPack< (-BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportStart ) ... > RightPointSupportRadii;
+		typedef UIntPack<   BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize    ... > SupportSizes;
+
+		for( int d=0 ; d<=maxDepth ; d++ )
+		{
+			std::vector< PointSupportKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( maxDepth ) );
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
+			{
+				PointEvaluatorState< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , PointD > > eState;
+				FEMTreeNode* node = _sNodes.treeNodes[i];
+
+				PointSupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+				typename FEMTreeNode::template ConstNeighbors< SupportSizes > neighbors;
+				neighborKey.getNeighbors( LeftPointSupportRadii() , RightPointSupportRadii() , node , neighbors );
+				LocalDepth d ; LocalOffset off;
+				_localDepthAndOffset( node , d , off );
+
+				size_t begin , end;
+				interpolationInfo.range( node , begin , end );
+				for( size_t pIndex=begin ; pIndex<end ; pIndex++ )
+				{
+					const DualPointInfo< Dim , Real , T , PointD >& pData = interpolationInfo[ pIndex ];
+					Point< Real , Dim > p = pData.position;
+					evaluator.initEvaluationState( p , d , off , eState );
+
+					int s[Dim];
+					WindowLoop< Dim >::Run
+					(
+						IsotropicUIntPack< Dim , 0 >() , SupportSizes() ,
+						[&]( int d , int i ){ s[d] = i; } ,
+						[&]( const FEMTreeNode* _node )
+						{
+							if( _isValidFEM1Node( _node ) )
+							{
+								LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( _node , _d , _off );
+								CumulativeDerivativeValues< Real , Dim , PointD > values = eState.template dValues< Real , CumulativeDerivatives< Dim , PointD > >( _off );
+								T dot = {};
+								for( int s=0 ; s<CumulativeDerivatives< Dim , PointD >::Size ; s++ ) dot += pData.dualValues[s] * values[s];
+								AddAtomic( constraints[ _node->nodeData.nodeIndex ] , dot );
+							}
+						} ,
+						neighbors.neighbors()
+					);
+				}
+			}
+		}
+		MemoryUsage();
+	}
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , class Coefficients1 , class Coefficients2 , unsigned int PointD >
+double FEMTree< Dim , Real >::_interpolationDot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot , const InterpolationInfo< T , PointD >* iInfo ) const
+{
+	typedef UIntPack< FEMSignature< FEMSigs1 >::Degree ... > FEMDegrees1;
+	typedef UIntPack< FEMSignature< FEMSigs2 >::Degree ... > FEMDegrees2;
+	typedef UIntPack< FEMSigs1 ... > FEMSignatures1;
+	typedef UIntPack< FEMSigs2 ... > FEMSignatures2;
+	double dot = 0;
+	if( iInfo )
+	{
+		MultiThreadedEvaluator< FEMSignatures1 , PointD , T > mt1( this , coefficients1 );
+		MultiThreadedEvaluator< FEMSignatures2 , PointD , T > mt2( this , coefficients2 );
+
+		size_t begin , end;
+		iInfo->range( _spaceRoot , begin , end );
+#pragma omp parallel for reduction( + : dot )
+		for( int i=(int)begin ; i<(int)end ; i++ )
+		{
+			Point< Real , Dim > p = (*iInfo)[i].position;
+			Real w = (*iInfo)[i].weight;
+			CumulativeDerivativeValues< T , Dim , PointD > v1 = (*iInfo)( i , mt1.values( p , omp_get_thread_num() ) );
+			CumulativeDerivativeValues< T , Dim , PointD > v2 = mt2.values( p , omp_get_thread_num() );
+			for( int dd=0 ; dd<CumulativeDerivatives< Dim , PointD >::Size ; dd++ ) dot += Dot( v1[dd] , v2[dd] ) * w;
+		}
+	}
+	return dot;
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , unsigned int ... Degrees1 , unsigned int ... Degrees2 , class Coefficients1 , class Coefficients2 >
+double FEMTree< Dim , Real >::_dot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , typename BaseFEMIntegrator::template Constraint< UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , 1 >& F , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot ) const
+{
+	_setFEM1ValidityFlags( UIntPack< FEMSigs1 ... >() );
+	_setFEM2ValidityFlags( UIntPack< FEMSigs2 ... >() );
+	typedef typename BaseFEMIntegrator::template Constraint< UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , 1 > BaseConstraint;
+	double dot = 0;
+	// Calculate the contribution from @(depth,depth)
+	{
+		typedef UIntPack<  BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapSize  ... >      OverlapSizes;
+		typedef UIntPack< -BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapStart ... >  LeftOverlapRadii;
+		typedef UIntPack<  BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapEnd   ... > RightOverlapRadii;
+
+		for( LocalDepth d=0 ; d<=_maxDepth ; d++ )
+		{
+			typename BaseConstraint::CCStencil stencil;
+			F.init( d );
+			F.template setStencil< false >( stencil );
+
+			std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
+
+#pragma omp parallel for reduction( + : dot )
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
+			{
+				const FEMTreeNode* node = _sNodes.treeNodes[i];
+				const T* _data1;
+				if( _isValidFEM1Node( node ) && ( _data1=coefficients1(node) ) )
+				{
+					ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+					typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+					neighborKey.getNeighbors( LeftOverlapRadii() , RightOverlapRadii() , node , neighbors );
+					bool isInterior = _isInteriorlyOverlapped( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , node );
+
+					LocalDepth d ; LocalOffset off;
+					_localDepthAndOffset( node , d , off );
+					ConstPointer( FEMTreeNode * const ) nodes = neighbors.neighbors().data;
+					ConstPointer( Point< double , 1 > ) stencilValues = stencil.data;
+					if( isInterior )
+					{
+						for( int i=0 ; i<WindowSize< UIntPack< BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapSize ... > >::Size ; i++  )
+						{
+							const T* _data2;
+							if( _isValidFEM2Node( nodes[i] ) && ( _data2=coefficients2( nodes[i] ) ) ) dot += Dot( *_data1 , *_data2 ) * stencilValues[i][0];
+						}
+					}
+					else
+					{
+						for( int i=0 ; i<WindowSize< UIntPack< BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapSize ... > >::Size ; i++  )
+						{
+							const T* _data2;
+							if( _isValidFEM2Node( nodes[i] ) && ( _data2=coefficients2( nodes[i] ) ) )
+							{
+								LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( nodes[i] , _d , _off );
+								dot += Dot( *_data1 , *_data2 ) * F.ccIntegrate( off , _off )[0];
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	// Calculate the contribution from @(<depth,depth)
+	{
+		typedef UIntPack<  BSplineOverlapSizes< Degrees2 , Degrees1 >::OverlapSize  ... >      OverlapSizes;
+		typedef UIntPack< -BSplineOverlapSizes< Degrees2 , Degrees1 >::OverlapStart ... >  LeftOverlapRadii;
+		typedef UIntPack<  BSplineOverlapSizes< Degrees2 , Degrees1 >::OverlapEnd   ... > RightOverlapRadii;
+
+		DenseNodeData< T , UIntPack< FEMSigs1 ... > > cumulative1( _sNodesEnd( _maxDepth-1 ) );
+		if( _maxDepth>0 ) memset( cumulative1() , 0 , sizeof(T) * _sNodesEnd( _maxDepth-1 ) );
+
+		for( LocalDepth d=1 ; d<=_maxDepth ; d++ )
+		{
+			// Update the cumulative coefficients with the coefficients @(depth-1)
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d-1) ; i<_sNodesEnd(d-1) ; i++ )
+			{
+				const T* _data1 = coefficients1( _sNodes.treeNodes[i] );
+				if( _data1 ) cumulative1[i] += *_data1;
+			}
+
+			typename BaseConstraint::PCStencils stencils;
+			F.init( d );
+			F.template setStencils< true >( stencils );
+
+			std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
+
+#pragma omp parallel for reduction( + : dot )
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
+			{
+				const FEMTreeNode* node = _sNodes.treeNodes[i];
+				const T* _data2;
+				if( _isValidFEM2Node( node ) && ( _data2=coefficients2( node ) ) )
+				{
+					ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+					bool isInterior = _isInteriorlyOverlapped( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , node->parent );
+
+					LocalDepth d ; LocalOffset off;
+					_localDepthAndOffset( node , d , off );
+
+					int cIdx = (int)( node - node->parent->children );
+					typename BaseConstraint::CCStencil& _stencil = stencils.data[cIdx];
+					typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+					neighborKey.getNeighbors( LeftOverlapRadii() , RightOverlapRadii() , node->parent , neighbors );
+
+					int start[Dim] , end[Dim];
+					_SetParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , node , start , end );
+					WindowLoop< Dim >::Run
+					(
+						start , end ,
+						[&]( int , int ){;} ,
+						[&]( const FEMTreeNode* node , Point< double , 1 > stencilValue )
+						{
+							const T* _data1;
+							if( _isValidFEM1Node( node ) && ( _data1=cumulative1(node) ) )
+							{
+								if( isInterior ) dot += Dot( *_data1 , *_data2 ) * stencilValue[0];
+								else
+								{
+									LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( node , _d , _off );
+									dot += Dot ( *_data1 , *_data2 ) * F.pcIntegrate( _off , off )[0];
+								}
+							}
+						} ,
+						neighbors.neighbors() , _stencil()
+					);
+				}
+			}
+			// Up sample the cumulative coefficients for the next level
+			if( d<_maxDepth ) _upSample( UIntPack< FEMSigs1 ... >() , F.tRestrictionProlongation() , d , cumulative1() );
+		}
+	}
+
+	// Calculate the contribution from @(>depth,depth)
+	{
+	typedef UIntPack<  BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapSize  ... >      OverlapSizes;
+	typedef UIntPack< -BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapStart ... >  LeftOverlapRadii;
+	typedef UIntPack<  BSplineOverlapSizes< Degrees1 , Degrees2 >::OverlapEnd   ... > RightOverlapRadii;
+
+	DenseNodeData< T , UIntPack< FEMSigs2 ... > > cumulative2( _sNodesEnd( _maxDepth-1 ) );
+	if( _maxDepth>0 ) memset( cumulative2() , 0 , sizeof(T) * _sNodesEnd( _maxDepth-1 ) );
+
+		for( LocalDepth d=_maxDepth ; d>0 ; d-- )
+		{
+			typename BaseConstraint::CPStencils stencils;
+			F.init( d );
+			F.template setStencils< false >( stencils );
+
+			std::vector< ConstOneRingNeighborKey > neighborKeys( omp_get_max_threads() );
+			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
+
+			// Update the cumulative constraints @(depth-1) from @(depth)
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
+			{
+				const FEMTreeNode* node = _sNodes.treeNodes[i];
+				const T* _data1;
+				if( _isValidFEM1Node( node ) && ( _data1=coefficients1( node ) ) )
+				{
+					ConstOneRingNeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+					bool isInterior = _isInteriorlyOverlapped( UIntPack< Degrees2 ... >() , UIntPack< Degrees1 ... >() , node->parent );
+
+					LocalDepth d ; LocalOffset off;
+					_localDepthAndOffset( node , d , off );
+
+					int cIdx = (int)( node - node->parent->children );
+					typename BaseConstraint::CCStencil& _stencil = stencils.data[cIdx];
+					typename FEMTreeNode::template ConstNeighbors< OverlapSizes > neighbors;
+					neighborKey.getNeighbors( LeftOverlapRadii() , RightOverlapRadii() , node->parent , neighbors );
+
+					int start[Dim] , end[Dim];
+					_SetParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , node , start , end );
+
+#ifdef __clang__
+#pragma message ( "[WARNING] You've got me clang" )
+					std::function< void (int,int) > updateFunction = [](int,int){};
+#endif // __clang__
+
+					WindowLoop< Dim >::Run
+					(
+						start , end ,
+#ifdef __clang__
+						updateFunction ,
+#else // !__clang__
+						[&]( int , int ){;} ,
+#endif // __clang__
+						[&]( const FEMTreeNode* node , Point< double , 1 > stencilValue )
+						{
+						if( _isValidFEM2Node( node ) )
+							{
+								T _dot;
+								if( isInterior ) _dot = (*_data1) * stencilValue[0];
+								else
+								{
+									LocalDepth _d ; LocalOffset _off ; _localDepthAndOffset( node , _d , _off );
+									_dot = (*_data1) * F.cpIntegrate( off , _off )[0];
+								}
+								AddAtomic( cumulative2[ node->nodeData.nodeIndex ] , _dot );
+							}
+						} ,
+						neighbors.neighbors() , _stencil()
+					);
+				}
+			}
+			// Update the dot-product using the cumulative constraints @(depth-1)
+#pragma omp parallel for reduction( + : dot )
+			for( int i=_sNodesBegin(d-1) ; i<_sNodesEnd(d-1) ; i++ )
+			{
+				const FEMTreeNode* node = _sNodes.treeNodes[i];
+				const T* _data2;
+				if( _isValidFEM2Node( node ) && ( _data2=coefficients2( node ) ) ) dot += Dot( cumulative2[ node->nodeData.nodeIndex ] , *_data2 );
+			}
+
+			// Down-sample the cumulative constraints from @(depth-1) to @(depth-2) for the next pass
+			if( d-1>0 ) _downSample( UIntPack< FEMSigs2 ... >() , F.cRestrictionProlongation() , d-1 , GetPointer( &cumulative2[0] , (int)cumulative2.size() ) );
+		}
+	}
+
+	return dot;
+}
diff --git a/Src/FEMTree.WeightedSamples.inl b/Src/FEMTree.WeightedSamples.inl
new file mode 100644
index 0000000..e33ca8a
--- /dev/null
+++ b/Src/FEMTree.WeightedSamples.inl
@@ -0,0 +1,306 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+template< class Real , unsigned int DataDegree , unsigned int ... DataDegrees > typename std::enable_if< sizeof ... ( DataDegrees )==0 >::type __SetBSplineComponentValues( const Real* position , const Real* start , Real width , double* values , unsigned int stride )
+{
+	Polynomial< DataDegree >::BSplineComponentValues( ( position[0] - start[0] ) / width , values );
+}
+template< class Real , unsigned int DataDegree , unsigned int ... DataDegrees > typename std::enable_if< sizeof ... ( DataDegrees )!=0 >::type __SetBSplineComponentValues( const Real* position , const Real* start , Real width , double* values , unsigned int stride )
+{
+	Polynomial< DataDegree >::BSplineComponentValues( ( position[0] - start[0] ) / width , values );
+	__SetBSplineComponentValues< Real , DataDegrees ... >( position+1 , start+1 , width , values + stride , stride );
+}
+
+
+// evaluate the result of splatting along a plane and then evaluating at a point on the plane.
+template< unsigned int Degree > double GetScaleValue( void )
+{
+	double centerValues[Degree+1];
+	Polynomial< Degree >::BSplineComponentValues( 0.5 , centerValues );
+	double scaleValue = 0;
+	for( int i=0 ; i<=Degree ; i++ ) scaleValue += centerValues[i] * centerValues[i];
+	return 1./ scaleValue;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int WeightDegree >
+void FEMTree< Dim , Real >::_addWeightContribution( DensityEstimator< WeightDegree >& densityWeights , FEMTreeNode* node , Point< Real , Dim > position , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , Real weight )
+{
+	static const double ScaleValue = GetScaleValue< WeightDegree >();
+	double values[ Dim ][ BSplineSupportSizes< WeightDegree >::SupportSize ];
+	typename FEMTreeNode::template Neighbors< IsotropicUIntPack< Dim , BSplineSupportSizes< WeightDegree >::SupportSize > >& neighbors = weightKey.template getNeighbors< true >( node , nodeAllocator , _NodeInitializer( *this ) );
+
+	densityWeights.reserve( nodeCount() );
+
+	Point< Real , Dim > start;
+	Real w;
+	_startAndWidth( node , start , w );
+
+	for( int dim=0 ; dim<Dim ; dim++ ) Polynomial< WeightDegree >::BSplineComponentValues( ( position[dim]-start[dim] ) / w , values[dim] );
+
+	weight *= (Real)ScaleValue;
+	double scratch[Dim+1];
+	scratch[0] = weight;
+	WindowLoop< Dim >::Run
+	(
+		IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , BSplineSupportSizes< WeightDegree >::SupportSize >() ,
+		[&]( int d , int i ){ scratch[d+1] = scratch[d] * values[d][i]; } ,
+		[&]( FEMTreeNode* node ){ if( node ) AddAtomic( densityWeights[ node ] , (Real)scratch[Dim] ); } ,
+		neighbors.neighbors()
+	);
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int WeightDegree , class PointSupportKey >
+Real FEMTree< Dim , Real >::_getSamplesPerNode( const DensityEstimator< WeightDegree >& densityWeights , const FEMTreeNode* node , Point< Real , Dim > position , PointSupportKey& weightKey ) const
+{
+	Real weight = 0;
+	typedef typename PointSupportKey::NeighborType Neighbors;
+	double values[ Dim ][ BSplineSupportSizes< WeightDegree >::SupportSize ];
+	Neighbors neighbors = weightKey.getNeighbors( node );
+	Point< Real , Dim > start;
+	Real w;
+	_startAndWidth( node , start , w );
+
+	for( int dim=0 ; dim<Dim ; dim++ ) Polynomial< WeightDegree >::BSplineComponentValues( ( position[dim]-start[dim] ) / w , values[dim] );
+	double scratch[Dim+1];
+	scratch[0] = 1;
+	WindowLoop< Dim >::Run
+	(
+		IsotropicUIntPack< Dim , 0 >() , IsotropicUIntPack< Dim , BSplineSupportSizes< WeightDegree >::SupportSize >() ,
+		[&]( int d , int i ){ scratch[d+1] = scratch[d] * values[d][i]; } ,
+		[&]( typename Neighbors::Window::data_type node ){ if( node ){ const Real* w = densityWeights( node ) ; if( w ) weight += (Real)( scratch[Dim] * (*w) ); } } ,
+		neighbors.neighbors()
+	);
+	return weight;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int WeightDegree , class PointSupportKey >
+void FEMTree< Dim , Real >::_getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , const FEMTreeNode* node , Point< Real , Dim > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const
+{
+	const FEMTreeNode* temp = node;
+	while( _localDepth( temp )>densityWeights.kernelDepth() ) temp = temp->parent;
+	weight = _getSamplesPerNode( densityWeights , temp , position , weightKey );
+	if( weight>=(Real)1. ) depth = Real( _localDepth( temp ) + log( weight ) / log(double(1<<( Dim-densityWeights.coDimension() ))) );
+	else
+	{
+		Real oldWeight , newWeight;
+		oldWeight = newWeight = weight;
+		while( newWeight<(Real)1. && temp->parent )
+		{
+			temp=temp->parent;
+			oldWeight = newWeight;
+			newWeight = _getSamplesPerNode( densityWeights , temp , position , weightKey );
+		}
+		depth = Real( _localDepth( temp ) + log( newWeight ) / log( newWeight / oldWeight ) );
+	}
+	weight = Real( pow( double(1<<( Dim-densityWeights.coDimension() )) , -double(depth) ) );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int WeightDegree , class PointSupportKey >
+void FEMTree< Dim , Real >::_getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , Point< Real , Dim > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const
+{
+	FEMTreeNode* temp;
+	Point< Real,  Dim > myCenter;
+	for( int d=0 ; d<Dim ; d++ ) myCenter[d] = (Real)0.5;
+	Real myWidth = Real( 1. );
+
+	// Get the finest node with depth less than or equal to the splat depth that contains the point
+	temp = _spaceRoot;
+	while( _localDepth( temp )<densityWeights.kernelDepth() )
+	{
+		if( !IsActiveNode< Dim >( temp->children ) ) break; // ERROR_OUT( "" );
+		int cIndex = FEMTreeNode::ChildIndex( myCenter , position );
+		temp = temp->children + cIndex;
+		myWidth /= 2;
+		for( int d=0 ; d<Dim ; d++ )
+			if( (cIndex>>d) & 1 ) myCenter[d] += myWidth/2;
+			else                  myCenter[d] -= myWidth/2;
+	}
+	return _getSampleDepthAndWeight( densityWeights , temp , position , weightKey , depth , weight );
+}
+
+template< unsigned int Dim , class Real >
+template< bool CreateNodes , class V , unsigned int ... DataSigs >
+void FEMTree< Dim , Real >::_splatPointData( FEMTreeNode* node , Point< Real , Dim > position , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& dataInfo , PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey )
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > SupportSizes;
+	double values[ Dim ][ SupportSizes::Max() ];
+	typename FEMTreeNode::template Neighbors< UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > >& neighbors = dataKey.template getNeighbors< CreateNodes >( node , nodeAllocator , _NodeInitializer( *this ) );
+
+	Point< Real , Dim > start;
+	Real w;
+	_startAndWidth( node , start , w );
+
+	__SetBSplineComponentValues< Real , FEMSignature< DataSigs >::Degree ... >( &position[0] , &start[0] , w , &values[0][0] , SupportSizes::Max() );
+	double scratch[Dim+1];
+	scratch[0] = 1;
+	WindowLoop< Dim >::Run
+	(
+		ZeroUIntPack< Dim >() , UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... >() ,
+		[&]( int d , int i ){ scratch[d+1] = scratch[d] * values[d][i]; } ,
+		[&]( FEMTreeNode* node ){ if( IsActiveNode< Dim >( node ) )	AddAtomic( dataInfo[ node ] , v * (Real)scratch[Dim] ); } ,
+		neighbors.neighbors()
+	);
+}
+template< unsigned int Dim , class Real >
+template< bool CreateNodes , unsigned int WeightDegree , class V , unsigned int ... DataSigs >
+Real FEMTree< Dim , Real >::_splatPointData( const DensityEstimator< WeightDegree >& densityWeights , Point< Real , Dim > position , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& dataInfo , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey , LocalDepth minDepth , LocalDepth maxDepth , int dim , Real depthBias )
+{
+	double dx;
+	V _v;
+	FEMTreeNode* temp;
+	int cnt=0;
+	double width;
+	Point< Real , Dim > myCenter;
+	for( int d=0 ; d<Dim ; d++ ) myCenter[d] = (Real)0.5;
+	Real myWidth = (Real)1.;
+
+	temp = _spaceRoot;
+	while( _localDepth( temp )<densityWeights.kernelDepth() )
+	{
+		if( !IsActiveNode< Dim >( temp->children ) ) break;
+		int cIndex = FEMTreeNode::ChildIndex( myCenter , position );
+		temp = temp->children + cIndex;
+		myWidth /= 2;
+		for( int d=0 ; d<Dim ; d++ )
+			if( (cIndex>>d) & 1 ) myCenter[d] += myWidth/2;
+			else                  myCenter[d] -= myWidth/2;
+	}
+	Real weight , depth;
+	_getSampleDepthAndWeight( densityWeights , temp , position , weightKey , depth , weight );
+	depth += depthBias;
+
+	if( depth<minDepth ) depth = Real(minDepth);
+	if( depth>maxDepth ) depth = Real(maxDepth);
+	int topDepth = int(ceil(depth));
+
+	dx = 1.0-(topDepth-depth);
+	if     ( topDepth<=minDepth ) topDepth = minDepth , dx = 1;
+	else if( topDepth> maxDepth ) topDepth = maxDepth , dx = 1;
+
+	while( _localDepth( temp )>topDepth ) temp=temp->parent;
+	while( _localDepth( temp )<topDepth )
+	{
+		if( !temp->children ) temp->initChildren( nodeAllocator , _NodeInitializer( *this ) );
+		int cIndex = FEMTreeNode::ChildIndex( myCenter , position );
+		temp = &temp->children[cIndex];
+		myWidth/=2;
+		for( int d=0 ; d<Dim ; d++ )
+			if( (cIndex>>d) & 1 ) myCenter[d] += myWidth/2;
+			else                  myCenter[d] -= myWidth/2;
+	}
+	width = 1.0 / ( 1<<_localDepth( temp ) );
+	_v = v * weight / Real( pow( width , dim ) ) * Real( dx );
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+	_splatPointData< CreateNodes , V >( temp , position , _v , dataInfo , dataKey );
+#else // !__GNUC__ || __GNUC__ >=5
+	_splatPointData< CreateNodes , V ,  DataSigs ... >( temp , position , _v , dataInfo , dataKey );
+#endif // __GNUC__ || __GNUC__ < 4
+	if( fabs(1.0-dx) > 1e-6 )
+	{
+		dx = Real(1.0-dx);
+		temp = temp->parent;
+		width = 1.0 / ( 1<<_localDepth( temp ) );
+
+		_v = v * weight / Real( pow( width , dim ) ) * Real( dx );
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+		_splatPointData< CreateNodes , V >( temp , position , _v , dataInfo , dataKey );
+#else // !__GNUC__ || __GNUC__ >=5
+		_splatPointData< CreateNodes , V , DataSigs ... >( temp , position , _v , dataInfo , dataKey );
+#endif // __GNUC__ || __GNUC__ < 4
+	}
+	return weight;
+}
+template< unsigned int Dim , class Real >
+template< bool CreateNodes , unsigned int WeightDegree , class V , unsigned int ... DataSigs >
+Real FEMTree< Dim , Real >::_multiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , FEMTreeNode* node , Point< Real , Dim > position , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& dataInfo , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey , int dim )
+{
+	typedef UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > SupportSizes;
+	Real _depth , weight;
+	if( densityWeights ) _getSampleDepthAndWeight( *densityWeights , position , weightKey , _depth , weight );
+	else weight = (Real)1.;
+	V _v = v * weight;
+
+	double values[ Dim ][ SupportSizes::Max() ];
+	dataKey.template getNeighbors< CreateNodes >( node , nodeAllocator , _NodeInitializer( *this ) );
+
+	for( FEMTreeNode* _node=node ; _localDepth( _node )>=0 ; _node=_node->parent )
+	{
+		V __v = _v * (Real)pow( 1<<_localDepth( _node ) , dim );
+		Point< Real , Dim > start;
+		Real w;
+		_startAndWidth( _node , start , w );
+		__SetBSplineComponentValues< Real , FEMSignature< DataSigs >::Degree ... >( &position[0] , &start[0] , w , &values[0][0] , SupportSizes::Max() );
+		typename FEMTreeNode::template Neighbors< UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... > >& neighbors = dataKey.neighbors[ _localToGlobal( _localDepth( _node ) ) ];
+		double scratch[Dim+1];
+		scratch[0] = 1.;
+		WindowLoop< Dim >::Run
+		(
+			ZeroUIntPack< Dim >() , UIntPack< BSplineSupportSizes< FEMSignature< DataSigs >::Degree >::SupportSize ... >() ,
+			[&]( int d , int i ){ scratch[d+1] = scratch[d] * values[d][i]; } ,
+			[&]( FEMTreeNode* node ){ if( IsActiveNode< Dim >( node ) ) dataInfo[ node ] += __v * (Real)scratch[Dim];	} ,
+			neighbors.neighbors()
+		);
+	}
+	return weight;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int WeightDegree , class V , unsigned int ... DataSigs >
+Real FEMTree< Dim , Real >::_nearestMultiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , FEMTreeNode* node , Point< Real , Dim > position , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& dataInfo , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , int dim )
+{
+	Real _depth , weight;
+	if( densityWeights ) _getSampleDepthAndWeight( *densityWeights , position , weightKey , _depth , weight );
+	else weight = (Real)1.;
+	V _v = v * weight;
+
+	for( FEMTreeNode* _node=node ; _localDepth( _node )>=0 ; _node=_node->parent ) if( IsActiveNode< Dim >( _node ) )  dataInfo[ _node ] += _v * (Real)pow( 1<<_localDepth( _node ) , dim );
+	return weight;
+}
+//////////////////////////////////
+// MultiThreadedWeightEvaluator //
+//////////////////////////////////
+template< unsigned int Dim , class Real >
+template< unsigned int DensityDegree >
+FEMTree< Dim , Real >::MultiThreadedWeightEvaluator< DensityDegree >::MultiThreadedWeightEvaluator( const FEMTree< Dim , Real >* tree , const DensityEstimator< DensityDegree >& density , int threads ) : _density( density ) , _tree( tree )
+{
+	_threads = std::max< int >( 1 , threads );
+	_neighborKeys.resize( _threads );
+	for( int t=0 ; t<_neighborKeys.size() ; t++ ) _neighborKeys[t].set( tree->_localToGlobal( density.kernelDepth() ) );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int DensityDegree >
+Real FEMTree< Dim , Real >::MultiThreadedWeightEvaluator< DensityDegree >::weight( Point< Real , Dim > p , int thread )
+{
+	ConstPointSupportKey< IsotropicUIntPack< Dim , DensityDegree > >& nKey = _neighborKeys[thread];
+	Real depth , weight;
+	_tree->_getSampleDepthAndWeight( _density , p , nKey , depth , weight );
+	return weight;
+}
diff --git a/Src/FEMTree.h b/Src/FEMTree.h
new file mode 100644
index 0000000..a15a254
--- /dev/null
+++ b/Src/FEMTree.h
@@ -0,0 +1,2622 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+// -- [TODO] Make as many of the functions (related to the solver) const as possible.
+// -- [TODO] Move the point interpolation constraint scaling by 1<<maxDepth
+// -- [TODO] Add support for staggered-grid test functions
+// -- [TODO] Store signatures with constraints/systems/restriction-prolongations
+// -- [TODO] Make a virtual evaluation that only needs to know the degree
+// -- [TODO] Modify (public) functions so that template parameters don't need to be passed when they are called
+// -- [TODO] Confirm that whenever _isValidFEM*Node is called, the flags have already been set.
+// -- [TODO] Make weight evaluation more efficient in _getSamplesPerNode by reducing the number of calls to getNeighbors
+
+// -- [TODO] For point evaluation:
+//        1. Have the evaluator store stencils for all depths [DONE]
+//        2. When testing centers/corners, don't use generic evaluation
+
+#ifndef FEM_TREE_INCLUDED
+#define FEM_TREE_INCLUDED
+
+#define VERSION "10.07"
+#define MEMORY_ALLOCATOR_BLOCK_SIZE 1<<12
+
+#define NEW_CODE
+
+#include <atomic>
+#include "MyMiscellany.h"
+#include "BSplineData.h"
+#include "Geometry.h"
+#include "PointStream.h"
+#include "RegularTree.h"
+#include "SparseMatrix.h"
+#include <functional>
+#include <string>
+
+
+template< unsigned int Dim , class Real > class FEMTree;
+
+enum
+{
+	SHOW_GLOBAL_RESIDUAL_NONE ,
+	SHOW_GLOBAL_RESIDUAL_LAST ,
+	SHOW_GLOBAL_RESIDUAL_ALL  ,
+	SHOW_GLOBAL_RESIDUAL_COUNT
+};
+const char* ShowGlobalResidualNames[] = { "show none" , "show last" , "show all" };
+
+class FEMTreeNodeData
+{
+public:
+	enum
+	{
+		SPACE_FLAG = 1 ,
+		FEM_FLAG_1 = 2 ,
+		FEM_FLAG_2 = 4 ,
+		REFINABLE_FLAG = 8 ,
+		GHOST_FLAG = 1<<7
+	};
+	int nodeIndex;
+	mutable char flags;
+	void setGhostFlag( bool f ) const { if( f ) flags |= GHOST_FLAG ; else flags &= ~GHOST_FLAG; }
+	bool getGhostFlag( void ) const { return ( flags & GHOST_FLAG )!=0; }
+	FEMTreeNodeData( void );
+	~FEMTreeNodeData( void );
+};
+
+template< unsigned int Dim >
+class SortedTreeNodes
+{
+	typedef RegularTreeNode< Dim , FEMTreeNodeData > TreeNode;
+protected:
+	Pointer( Pointer( int ) ) _sliceStart;
+	int _levels;
+public:
+	Pointer( TreeNode* ) treeNodes;
+	int begin( int depth ) const { return _sliceStart[depth][0]; }
+	int   end( int depth ) const { return _sliceStart[depth][(size_t)1<<depth]; }
+	int begin( int depth , int slice ) const { return _sliceStart[depth][ slice<0 ? 0 : ( slice>(1<<depth) ? (1<<depth) : slice ) ]; }
+	int   end( int depth , int slice ) const { return begin( depth , slice+1 ); }
+	int size( void ) const { return _sliceStart[_levels-1][(size_t)1<<(_levels-1)]; }
+	int size( int depth ) const { if(depth<0||depth>=_levels) printf( "uhoh\n" ); return _sliceStart[depth][(size_t)1<<depth] - _sliceStart[depth][0]; }
+	int size( int depth , int slice ) const { return end( depth , slice ) - begin( depth , slice ); }
+	int levels( void ) const { return _levels; }
+
+	SortedTreeNodes( void );
+	~SortedTreeNodes( void );
+	void set( TreeNode& root , std::vector< int >* map );
+	size_t set( TreeNode& root );
+};
+
+template< typename T > struct DotFunctor{};
+template< > struct DotFunctor< float >
+{
+	double operator()( float  v1 , float  v2 ){ return v1*v2; }
+	unsigned int dimension( void ) const { return 1; }
+};
+template< > struct DotFunctor< double >
+{
+	double operator()( double v1 , double v2 ){ return v1*v2; }
+	unsigned int dimension( void ) const { return 1; }
+};
+template< class Real , unsigned int Dim > struct DotFunctor< Point< Real , Dim > >
+{
+	double operator()( Point< Real , Dim > v1 , Point< Real , Dim > v2 ){ return Point< Real , Dim >::Dot( v1 , v2 ); }
+	unsigned int dimension( void ) const { return Dim; }
+};
+
+template< typename Pack > struct SupportKey{ };
+template< unsigned int ... Degrees >
+struct SupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template NeighborKey< UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart) ... > , UIntPack< BSplineSupportSizes< Degrees >::SupportEnd ... > >
+{
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > LeftRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd   ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportSize  ) ... > Sizes; 
+};
+template< typename Pack > struct ConstSupportKey{ };
+template< unsigned int ... Degrees >
+struct ConstSupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template ConstNeighborKey< UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > , UIntPack< BSplineSupportSizes< Degrees >::SupportEnd ... > >
+{
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > LeftRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd   ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportSize  ) ... > Sizes; 
+};
+template< typename Pack > struct OverlapKey{ };
+template< unsigned int ... Degrees >
+struct OverlapKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template NeighborKey< UIntPack< (-BSplineOverlapSizes< Degrees , Degrees >::OverlapStart ) ... > , UIntPack< BSplineOverlapSizes< Degrees , Degrees >::OverlapEnd ... > >
+{
+	typedef UIntPack< (-BSplineOverlapSizes< Degrees , Degrees >::OverlapStart ) ... > LeftRadii;
+	typedef UIntPack< ( BSplineOverlapSizes< Degrees , Degrees >::OverlapEnd   ) ... > RightRadii;
+	typedef UIntPack< ( BSplineOverlapSizes< Degrees , Degrees >::OverlapSize  ) ... > Sizes; 
+};
+template< typename Pack > struct ConstOverlapKey{ };
+template< unsigned int ... Degrees >
+struct ConstOverlapKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template ConstNeighborKey< UIntPack< (-BSplineOverlapSizes< Degrees , Degrees >::OverlapStart ) ... > , UIntPack< BSplineOverlapSizes< Degrees , Degrees >::OverlapEnd ... > >
+{
+	typedef UIntPack< (-BSplineOverlapSizes< Degrees , Degrees >::OverlapStart ) ... > LeftRadii;
+	typedef UIntPack< ( BSplineOverlapSizes< Degrees , Degrees >::OverlapEnd   ) ... > RightRadii;
+	typedef UIntPack< ( BSplineOverlapSizes< Degrees , Degrees >::OverlapSize  ) ... > Sizes; 
+};
+
+template< typename Pack > struct PointSupportKey{ };
+template< unsigned int ... Degrees >
+struct PointSupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template NeighborKey< UIntPack< BSplineSupportSizes< Degrees >::SupportEnd ... > , UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > >
+{
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd   ) ... > LeftRadii;
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd - BSplineSupportSizes< Degrees >::SupportStart + 1 ) ... > Sizes; 
+};
+template< typename Pack > struct ConstPointSupportKey{ };
+template< unsigned int ... Degrees >
+struct ConstPointSupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template ConstNeighborKey< UIntPack< BSplineSupportSizes< Degrees >::SupportEnd ... > , UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > >
+{
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd   ) ... > LeftRadii;
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::SupportStart ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::SupportEnd - BSplineSupportSizes< Degrees >::SupportStart + 1 ) ... > Sizes; 
+};
+
+template< typename Pack > struct CornerSupportKey{ };
+template< unsigned int ... Degrees >
+struct CornerSupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template NeighborKey< UIntPack< BSplineSupportSizes< Degrees >::BCornerEnd ... > , UIntPack< ( -BSplineSupportSizes< Degrees >::BCornerStart + 1 ) ... > >
+{
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::BCornerEnd       ) ... > LeftRadii;
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::BCornerStart + 1 ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::BCornerSize  + 1 ) ... > Sizes; 
+};
+template< typename Pack > struct ConstCornerSupportKey{ };
+template< unsigned int ... Degrees >
+struct ConstCornerSupportKey< UIntPack< Degrees ... > > : public RegularTreeNode< sizeof...(Degrees) , FEMTreeNodeData >::template ConstNeighborKey< UIntPack< BSplineSupportSizes< Degrees >::BCornerEnd ... > , UIntPack< ( -BSplineSupportSizes< Degrees >::BCornerStart + 1 ) ... > >
+{
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::BCornerEnd       ) ... > LeftRadii;
+	typedef UIntPack< (-BSplineSupportSizes< Degrees >::BCornerStart + 1 ) ... > RightRadii;
+	typedef UIntPack< ( BSplineSupportSizes< Degrees >::BCornerSize  + 1 ) ... > Sizes; 
+};
+
+// This represents a vector that can only grow in size.
+// It has the property that once a reference to an element is returned, that reference remains valid until the vector is destroyed.
+template< typename T , unsigned int LogBlockSize=10 , unsigned InitialBlocks=10 , unsigned int AllocationMultiplier=2 >
+struct BlockedVector
+{
+	BlockedVector( T defaultValue=T() ) : _defaultValue( defaultValue )
+	{
+		_reservedBlocks = InitialBlocks;
+		_blocks = NewPointer< Pointer( T ) >( _reservedBlocks );
+		for( size_t i=0 ; i<_reservedBlocks ; i++ ) _blocks[i] = NullPointer( Pointer( T ) );
+		_allocatedBlocks = _size = 0;
+	}
+	~BlockedVector( void )
+	{
+		for( size_t i=0 ; i<_allocatedBlocks ; i++ ) DeletePointer( _blocks[i] );
+		DeletePointer( _blocks );
+	}
+	BlockedVector( const BlockedVector& v )
+	{
+		_reservedBlocks = v._reservedBlocks , _allocatedBlocks = v._allocatedBlocks , _size = v._size , _defaultValue = v._defaultValue;
+		_blocks = NewPointer< Pointer( T ) >( _reservedBlocks );
+		for( size_t i=0 ; i<_allocatedBlocks ; i++ )
+		{
+			_blocks[i] = NewPointer< T >( _BlockSize );
+			memcpy( _blocks[i] , v._blocks[i] , sizeof(T)*_BlockSize );
+		}
+		for( size_t i=_allocatedBlocks ; i<_reservedBlocks ; i++ ) _blocks[i] = NullPointer( Pointer ( T ) );
+	}
+	BlockedVector& operator = ( const BlockedVector&  v )
+	{
+		for( size_t i=0 ; i<_allocatedBlocks ; i++ ) DeletePointer( _blocks[i] );
+		DeletePointer( _blocks );
+		_reservedBlocks = v._reservedBlocks , _blocks = v._blocks , _allocatedBlocks = v._allocatedBlocks , _size = v._size , _defaultValue = v._defaultValue;
+		_blocks = NewPointer< Pointer( T ) >( _reservedBlocks );
+		for( size_t i=0 ; i<_allocatedBlocks ; i++ )
+		{
+			_blocks[i] = NewPointer< T >( _BlockSize );
+			memcpy( _blocks[i] , v._blocks[i] , sizeof(T)*_BlockSize );
+		}
+		for( size_t i=_allocatedBlocks ; i<_reservedBlocks ; i++ ) _blocks[i] = NullPointer( Pointer ( T ) );
+		return *this;
+	}
+	BlockedVector( BlockedVector&& v )
+	{
+		_reservedBlocks = v._reservedBlocks , _allocatedBlocks = v._allocatedBlocks , _size = v._size , _defaultValue = v._defaultValue , _blocks = v._blocks;
+		v._reservedBlocks = v._allocatedBlocks = v._size = 0 , v._blocks = NullPointer( Pointer( T ) );
+	}
+	BlockedVector& operator = ( BlockedVector&& v )
+	{
+		for( size_t i=0 ; i<_allocatedBlocks ; i++ ) DeletePointer( _blocks[i] );
+		DeletePointer( _blocks );
+		_reservedBlocks = v._reservedBlocks , _allocatedBlocks = v._allocatedBlocks , _size = v._size , _defaultValue = v._defaultValue , _blocks = v._blocks;
+		v._reservedBlocks = v._allocatedBlocks = v._size = 0 , v._blocks = NullPointer( Pointer( T ) );
+		return *this;
+	}
+
+	size_t size( void ) const { return _size; }
+	const T& operator[]( size_t idx ) const { return _blocks[idx>>LogBlockSize][idx&_Mask]; }
+	T& operator[]( size_t idx ){ return _blocks[idx>>LogBlockSize][idx&_Mask]; }
+
+	size_t resize( size_t size ){ return resize( size , _defaultValue ); }
+	size_t resize( size_t size , const T& defaultValue )
+	{
+		if( size<=_size )
+		{
+#ifdef _MSC_VER
+			WARN( "BlockedVector::resize: new size must be greater than old size: %llu > %llu" , size , _size );
+#else // !MSC_VER
+			WARN( "BlockedVector::resize: new size must be greater than old size: %lu > %lu" , size , _size );
+#endif // _MSC_VER
+			return _size;
+		}
+		size_t index = size-1;
+		size_t block = index >> LogBlockSize;
+		size_t blockIndex = index & _Mask;
+
+		// If there are insufficiently many blocks
+		if( block>=_reservedBlocks )
+		{
+			size_t newReservedSize = std::max< size_t >( _reservedBlocks * AllocationMultiplier , block+1 );
+			Pointer( Pointer( T ) ) __blocks = NewPointer< Pointer( T ) >( newReservedSize );
+			memcpy( __blocks , _blocks , sizeof( Pointer( T ) ) * _reservedBlocks );
+			for( size_t i=_reservedBlocks ; i<newReservedSize ; i++ ) __blocks[i] = NullPointer( Pointer( T )  );
+			Pointer( Pointer( T ) ) _oldBlocks = _blocks;
+			_blocks = __blocks;
+			_reservedBlocks = newReservedSize;
+			DeletePointer( _oldBlocks );
+		}
+
+		// If the block hasn't been allocated
+		if( block>=_allocatedBlocks )
+		{
+			for( size_t b=_allocatedBlocks ; b<=block ; b++ )
+			{
+				_blocks[b] = NewPointer< T >( _BlockSize );
+				for( size_t i=0 ; i<_BlockSize ; i++ ) _blocks[b][i] = defaultValue;
+			}
+			_allocatedBlocks = block+1;
+		}
+		_size = index+1;
+		return index;
+	}
+	size_t push( void ){ return resize( _size+1 ); }
+
+protected:
+	static const size_t _BlockSize = 1<<LogBlockSize;
+	static const size_t _Mask = (1<<LogBlockSize)-1;
+
+	T _defaultValue;
+	size_t _allocatedBlocks , _reservedBlocks;
+	size_t _size;
+	Pointer( Pointer( T ) ) _blocks;
+};
+
+
+template< class Data , typename Pack > struct _SparseOrDenseNodeData{};
+template< class Data , unsigned int ... FEMSigs >
+struct _SparseOrDenseNodeData< Data , UIntPack< FEMSigs ... > >
+{
+	static const unsigned int Dim = sizeof ... ( FEMSigs );
+	typedef UIntPack< FEMSigs ... > FEMSignatures;
+	typedef Data data_type;
+
+	virtual size_t size( void ) const = 0;
+	virtual const Data& operator[] ( int idx ) const = 0;
+	virtual Data& operator[] ( int idx ) = 0;
+
+	virtual Data& operator[]( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) = 0;
+	virtual Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) = 0;
+	virtual const Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) const = 0;
+};
+
+template< class Data , typename Pack > struct SparseNodeData{};
+template< class Data , unsigned int ... FEMSigs >
+struct SparseNodeData< Data , UIntPack< FEMSigs ... > > : public _SparseOrDenseNodeData< Data , UIntPack< FEMSigs ... > >
+{
+	static const unsigned int Dim = sizeof ... ( FEMSigs );
+
+	size_t size( void ) const { return _data.size(); }
+	const Data& operator[] ( int idx ) const { return _data[idx]; }
+	Data& operator[] ( int idx ) { return _data[idx]; }
+
+	void reserve( size_t sz ){ if( sz>_indices.size() ) _indices.resize( sz , -1 ); }
+	Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ){ return ( node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_indices.size() || _indices[ node->nodeData.nodeIndex ]<0 ) ? NULL : &_data[ _indices[ node->nodeData.nodeIndex ] ]; }
+	const Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) const { return ( node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_indices.size() || _indices[ node->nodeData.nodeIndex ]<0 ) ? NULL : &_data[ _indices[ node->nodeData.nodeIndex ] ]; }
+	Data& operator[]( const RegularTreeNode< Dim , FEMTreeNodeData >* node )
+	{
+		// If the node hasn't been indexed yet
+		if( node->nodeData.nodeIndex>=(int)_indices.size() )
+#pragma omp critical( SparseNodeData__operator )
+			if( node->nodeData.nodeIndex>=(int)_indices.size() ) _indices.resize( node->nodeData.nodeIndex+1 , -1 );
+
+		// If the node hasn't been allocated yet
+		if( _indices[ node->nodeData.nodeIndex ]==-1 )
+#pragma omp critical( SparseNodeData__operator )
+			if( _indices[ node->nodeData.nodeIndex ]==-1 ) _indices[ node->nodeData.nodeIndex ] = (int)_data.push();
+
+		return _data[ _indices[ node->nodeData.nodeIndex ] ];
+	}
+	int index( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) const
+	{
+		if( !node || node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_indices.size() ) return -1;
+		else return _indices[ node->nodeData.nodeIndex ];
+	}
+
+protected:
+	template< unsigned int _Dim , class _Real > friend class FEMTree;
+	// Map should be the size of the old number of entries and map[i] should give the new index of the old i-th node
+	void _remapIndices( const int* newNodeIndices , unsigned int newNodeCount )
+	{
+		BlockedVector< int > newIndices;
+		newIndices.resize( newNodeCount );
+		for( int i=0 ; i<(int)newNodeCount ; i++ ) newIndices[i] = -1;
+		for( size_t i=0 ; i<(int)_indices.size() ; i++ ) if( newNodeIndices[i]>=0 && newNodeIndices[i]<(int)newNodeCount ) newIndices[ newNodeIndices[i] ] = _indices[i];
+		_indices = newIndices;
+	}
+	BlockedVector< int > _indices; 
+	BlockedVector< Data > _data;
+};
+
+template< class Data , typename Pack > struct DenseNodeData{};
+template< class Data , unsigned int ... FEMSigs >
+struct DenseNodeData< Data , UIntPack< FEMSigs ... > > : public _SparseOrDenseNodeData< Data , UIntPack< FEMSigs ... > >
+{
+	static const unsigned int Dim = sizeof ... ( FEMSigs );
+	DenseNodeData( void ) { _data = NullPointer( Data ) ; _sz = 0; }
+	DenseNodeData( size_t sz ){ _sz = sz ; if( sz ) _data = NewPointer< Data >( sz ) ; else _data = NullPointer( Data ); }
+	DenseNodeData( const DenseNodeData&  d ) : DenseNodeData() { _resize( d._sz ) ; if( _sz ) memcpy( _data , d._data , sizeof(Data) * _sz ); }
+	DenseNodeData(       DenseNodeData&& d ){ _data = d._data , _sz = d._sz ; d._data = NullPointer( Data ) , d._sz = 0; }
+	DenseNodeData& operator = ( const DenseNodeData&  d ){ _resize( d._sz ) ; if( _sz ) memcpy( _data , d._data , sizeof(Data) * _sz ) ; return *this; }
+	DenseNodeData& operator = (       DenseNodeData&& d ){ size_t __sz = _sz ; Pointer( Data ) __data = _data ; _data = d._data , _sz = d._sz ; d._data = __data , d._sz = __sz ; return *this; }
+	~DenseNodeData( void ){ DeletePointer( _data ) ; _sz = 0; }
+	static void WriteSignatures( FILE* fp )
+	{
+		unsigned int dim = sizeof ... ( FEMSigs );
+		fwrite( &dim , sizeof(unsigned int) , 1 , fp );
+		unsigned int femSigs[] = { FEMSigs ... };
+		fwrite( femSigs , sizeof(unsigned int) , dim , fp );
+	}
+	void write( FILE* fp ) const { fwrite( &_sz , sizeof(size_t) , 1 , fp ) ; fwrite( _data , sizeof(Data) , _sz  , fp ); }
+	void read( FILE* fp )
+	{
+		if( fread( &_sz , sizeof(size_t) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read size" );
+		_data = NewPointer< Data >( _sz );
+		if( fread ( _data , sizeof(Data) , _sz  , fp )!=_sz ) ERROR_OUT( "failed to read data" );
+	}
+
+	Data& operator[] ( int idx ) { return _data[idx]; }
+	const Data& operator[] ( int idx ) const { return _data[idx]; }
+	size_t size( void ) const { return _sz; }
+	Data& operator[]( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) { return _data[ node->nodeData.nodeIndex ]; }
+	Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) { return ( node==NULL || node->nodeData.nodeIndex>=(int)_sz ) ? NULL : &_data[ node->nodeData.nodeIndex ]; }
+	const Data* operator()( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) const { return ( node==NULL || node->nodeData.nodeIndex>=(int)_sz ) ? NULL : &_data[ node->nodeData.nodeIndex ]; }
+	int index( const RegularTreeNode< Dim , FEMTreeNodeData >* node ) const { return ( !node || node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)this->_data.size() ) ? -1 : node->nodeData.nodeIndex; }
+	Pointer( Data ) operator()( void ) { return _data; }
+	ConstPointer( Data ) operator()( void ) const { return ( ConstPointer( Data ) )_data; }
+protected:
+	template< unsigned int _Dim , class _Real > friend class FEMTree;
+	// Map should be the size of the old number of entries and map[i] should give the new index of the old i-th node
+	void _remapIndices( const int* newNodeIndices , size_t newNodeCount )
+	{
+		Pointer( Data ) newData = NewPointer< Data >( newNodeCount );
+		memset( newData , 0 , sizeof(Data)*newNodeCount );
+		for( size_t i=0 ; i<_sz ; i++ ) if( newNodeIndices[i]>=0 && newNodeIndices[i]<newNodeCount ) newData[ newNodeIndices[i] ] = _data[i];
+		DeletePointer( _data );
+		_data = newData;
+		_sz = newNodeCount;
+	}
+	size_t _sz;
+	void _resize( size_t sz ){ DeletePointer( _data ) ; if( sz ) _data = NewPointer< Data >( sz ) ; else _data = NullPointer( Data ) ; _sz = sz; }
+	Pointer( Data ) _data;
+};
+enum FEMTreeRealType
+{
+	FEM_TREE_REAL_FLOAT ,
+	FEM_TREE_REAL_DOUBLE ,
+	FEM_TREE_REAL_COUNT
+};
+const char* FEMTreeRealNames[] = { "float" , "double" };
+
+void ReadFEMTreeParameter( FILE* fp , FEMTreeRealType& realType , int &dimension )
+{
+	if( fread( &realType , sizeof(FEMTreeRealType) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read real type" );
+	if( fread( &dimension , sizeof(int) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read dimension" );
+}
+unsigned int* ReadDenseNodeDataSignatures( FILE* fp , unsigned int &dim )
+{
+	if( fread( &dim , sizeof(unsigned int) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read dimension" );
+	unsigned int* femSigs = new unsigned int[dim];
+	if( fread( femSigs , sizeof(unsigned int) , dim , fp )!=dim ) ERROR_OUT( "Failed to read signatures" );
+	return femSigs;
+}
+
+// The Derivative method needs static members:
+//		Dim: the dimensionality of the space in which derivatives are evaluated
+//		Size: the total number of derivatives
+// and static methods:
+//		Index: takes the number of partials along each dimension and returns the index
+//		Factor: takes an index and sets the number of partials along each dimension
+
+template< typename T > struct TensorDerivatives{ };
+template< class Real , typename T > struct TensorDerivativeValues{ };
+
+// Specify the derivatives for each dimension separately
+template< unsigned int D , unsigned int ... Ds >
+struct TensorDerivatives< UIntPack< D , Ds ... > >
+{
+	typedef TensorDerivatives< UIntPack< Ds ... > > _TensorDerivatives;
+	static const int LastDerivative = UIntPack< D , Ds ... >::template Get< sizeof ... (Ds) >();
+	static const int Dim = _TensorDerivatives::Dim + 1;
+	static const unsigned int Size = _TensorDerivatives::Size * ( D+1 );
+	static void Factor( unsigned int idx , unsigned int derivatives[Dim] ){ derivatives[0] = idx / _TensorDerivatives::Size ; _TensorDerivatives::Factor( idx % _TensorDerivatives::Size , derivatives+1 ); }
+	static unsigned int Index( const unsigned int derivatives[Dim] ){ return _TensorDerivatives::Index( derivatives + 1 ) + _TensorDerivatives::Size * derivatives[0]; }
+};
+template< unsigned int D >
+struct TensorDerivatives< UIntPack< D > >
+{
+	static const int LastDerivative = D;
+	static const int Dim = 1;
+	static const unsigned int Size = D+1;
+	static void Factor( unsigned int idx , unsigned int derivatives[1] ){ derivatives[0] = idx; }
+	static unsigned int Index( const unsigned int derivatives[1] ){ return derivatives[0]; }
+};
+template< class Real , unsigned int ... Ds > struct TensorDerivativeValues< Real , UIntPack< Ds ... > > : public Point< Real , TensorDerivatives< UIntPack< Ds ... > >::Size >{ };
+
+// Specify the sum of the derivatives
+template< unsigned int Dim , unsigned int D >
+struct CumulativeDerivatives
+{
+	typedef CumulativeDerivatives< Dim , D-1 > _CumulativeDerivatives;
+	static const int LastDerivative = D;
+	static const unsigned int Size = _CumulativeDerivatives::Size * Dim + 1;
+	static void Factor( unsigned int idx , unsigned int d[Dim] )
+	{
+		if( idx<_CumulativeDerivatives::Size ) return _CumulativeDerivatives::Factor( idx , d );
+		else _Factor( idx - _CumulativeDerivatives::Size , d );
+	}
+	static unsigned int Index( const unsigned int derivatives[Dim] )
+	{
+		int dCount = 0;
+		for( int d=0 ; d<Dim ; d++ ) dCount += derivatives[d];
+		if( dCount>=D ) ERROR_OUT( "More derivatives than allowed" );
+		else if( dCount<D ) return _CumulativeDerivatives::Index( derivatives );
+		else                return _CumulativeDerivatives::Size + _Index( derivatives );
+	}
+protected:
+	static const unsigned int _Size = _CumulativeDerivatives::_Size * Dim;
+	static void _Factor( unsigned int idx , unsigned int d[Dim] )
+	{
+		_CumulativeDerivatives::_Factor( idx % _CumulativeDerivatives::_Size , d );
+		d[ idx / _CumulativeDerivatives::_Size ]++;
+	}
+	static unsigned int _Index( const unsigned int d[Dim] )
+	{
+		unsigned int _d[Dim];
+		memcpy( _d , d , sizeof(_d) );
+		for( int i=0 ; i<Dim ; i++ ) if( _d[i] )
+		{
+			_d[i]--;
+			return _CumulativeDerivatives::Index( _d ) * Dim + i;
+		}
+		ERROR_OUT( "No derivatives specified" );
+		return -1;
+	}
+	friend CumulativeDerivatives< Dim , D+1 >;
+};
+template< unsigned int Dim >
+struct CumulativeDerivatives< Dim , 0 >
+{
+	static const int LastDerivative = 0;
+	static const unsigned int Size = 1;
+	static void Factor( unsigned int idx , unsigned int d[Dim] ){ memset( d , 0 , sizeof(unsigned int)*Dim ); }
+	static unsigned int Index( const unsigned int derivatives[Dim] ){ return 0; }
+protected:
+	static const unsigned int _Size = 1;
+	static void _Factor( unsigned int idx , unsigned int d[Dim] ){ memset( d , 0 , sizeof(unsigned int)*Dim ); }
+	friend CumulativeDerivatives< Dim , 1 >;
+};
+template< typename Real , unsigned int Dim , unsigned int D > using CumulativeDerivativeValues = Point< Real , CumulativeDerivatives< Dim , D >::Size >;
+
+
+template< unsigned int Dim , class Real , unsigned int D >
+CumulativeDerivativeValues< Real , Dim , D > Evaluate( const double dValues[Dim][D+1] )
+{
+	CumulativeDerivativeValues< Real , Dim , D > v;
+	unsigned int _d[Dim];
+	for( int d=0 ; d<CumulativeDerivatives< Dim , D >::Size ; d++ )
+	{
+		CumulativeDerivatives< Dim , D >::Factor( d , _d );
+		double value = dValues[0][ _d[0] ];
+		for( int dd=1 ; dd<Dim ; dd++ ) value *= dValues[dd][ _d[dd] ];
+		v[d] = (Real)value;
+	}
+	return v;
+}
+
+template< unsigned int Dim , class Real , typename T , unsigned int D >
+struct DualPointInfo
+{
+	Point< Real , Dim > position;
+	Real weight;
+	CumulativeDerivativeValues< T , Dim , D > dualValues;
+	DualPointInfo  operator +  ( const DualPointInfo& p ) const { return DualPointInfo( position + p.position , dualValues + p.dualValues , weight + p.weight ); }
+	DualPointInfo& operator += ( const DualPointInfo& p ){ position += p.position ; weight += p.weight , dualValues += p.dualValues ; return *this; }
+	DualPointInfo  operator *  ( Real s ) const { return DualPointInfo( position*s , weight*s , dualValues*s ); }
+	DualPointInfo& operator *= ( Real s ){ position *= s , weight *= s , dualValues *= s ; return *this; }
+	DualPointInfo  operator /  ( Real s ) const { return DualPointInfo( position/s , weight/s , dualValues/s ); }
+	DualPointInfo& operator /= ( Real s ){ position /= s , weight /= s , dualValues /= s ; return *this; }
+	DualPointInfo( void ) : weight(0) { }
+	DualPointInfo( Point< Real , Dim > p , CumulativeDerivativeValues< T , Dim , D > c , Real w ) { position = p , dualValues = c , weight = w; }
+};
+template< unsigned int Dim , class Real , typename Data , typename T , unsigned int D >
+struct DualPointAndDataInfo
+{
+	DualPointInfo< Dim , Real , T , D > pointInfo;
+	Data data;
+	DualPointAndDataInfo  operator +  ( const DualPointAndDataInfo& p ) const { return DualPointAndDataInfo( pointInfo + p.pointInfo , data + p.data ); }
+	DualPointAndDataInfo  operator *  ( Real s )                        const { return DualPointAndDataInfo( pointInfo * s , data * s ); }
+	DualPointAndDataInfo  operator /  ( Real s )                        const { return DualPointAndDataInfo( pointInfo / s , data / s ); }
+	DualPointAndDataInfo& operator += ( const DualPointAndDataInfo& p ){ pointInfo += p.pointInfo ; data += p.data ; return *this; }
+	DualPointAndDataInfo& operator *= ( Real s )                       { pointInfo *= s , data *= s ; return *this; }
+	DualPointAndDataInfo& operator /= ( Real s )                       { pointInfo /= s , data /= s ; return *this; }
+	DualPointAndDataInfo( void ){ }
+	DualPointAndDataInfo( DualPointInfo< Dim , Real , T , D > p , Data d ) { pointInfo = p , data = d; }
+};
+template< unsigned int Dim , class Real , typename T , unsigned int D >
+struct DualPointInfoBrood
+{
+	DualPointInfo< Dim , Real , T , D >& operator[]( size_t idx ){ return _dpInfo[idx]; }
+	const DualPointInfo< Dim , Real , T , D >& operator[]( size_t idx ) const { return _dpInfo[idx]; }
+	void finalize( void ){ _size = 0 ; for( int i=0 ; i<(1<<Dim) ; i++ ) if( _dpInfo[i].weight>0 ) _dpInfo[_size++] = _dpInfo[i]; }
+	unsigned int size( void ) const { return _size; }
+
+	DualPointInfoBrood  operator +  ( const DualPointInfoBrood& p ) const { DualPointInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] + p._dpInfo[i] ;  return d; }
+	DualPointInfoBrood  operator *  ( Real s )                      const { DualPointInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] * s            ;  return d; }
+	DualPointInfoBrood  operator /  ( Real s )                      const { DualPointInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] / s            ;  return d; }
+	DualPointInfoBrood& operator += ( const DualPointInfoBrood& p ){ for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] += p._dpInfo[i] ; return *this; }
+	DualPointInfoBrood& operator *= ( Real s )                     { for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] *= s            ; return *this; }
+	DualPointInfoBrood& operator /= ( Real s )                     { for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] /= s            ; return *this; }
+protected:
+	DualPointInfo< Dim , Real , T , D > _dpInfo[1<<Dim];
+	unsigned int _size;
+};
+template< unsigned int Dim , class Real , typename Data , typename T , unsigned int D >
+struct DualPointAndDataInfoBrood
+{
+	DualPointAndDataInfo< Dim , Real , Data , T , D >& operator[]( size_t idx ){ return _dpInfo[idx]; }
+	const DualPointAndDataInfo< Dim , Real , Data , T , D >& operator[]( size_t idx ) const { return _dpInfo[idx]; }
+	void finalize( void ){ _size = 0 ; for( int i=0 ; i<(1<<Dim) ; i++ ) if( _dpInfo[i].pointInfo.weight>0 ) _dpInfo[_size++] = _dpInfo[i]; }
+	unsigned int size( void ) const { return _size; }
+
+	DualPointAndDataInfoBrood  operator +  ( const DualPointAndDataInfoBrood& p ) const { DualPointAndDataInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] + p._dpInfo[i] ;  return d; }
+	DualPointAndDataInfoBrood  operator *  ( Real s )                             const { DualPointAndDataInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] * s            ;  return d; }
+	DualPointAndDataInfoBrood  operator /  ( Real s )                             const { DualPointAndDataInfoBrood d ; for( int i=0 ; i<(1<<Dim) ; i++ ) d._dpInfo[i] = _dpInfo[i] / s            ;  return d; }
+	DualPointAndDataInfoBrood& operator += ( const DualPointAndDataInfoBrood& p ){ for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] += p._dpInfo[i] ; return *this; }
+	DualPointAndDataInfoBrood& operator *= ( Real s )                            { for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] *= s ; return *this; }
+	DualPointAndDataInfoBrood& operator /= ( Real s )                            { for( int i=0 ; i<(1<<Dim) ; i++ ) _dpInfo[i] /= s ; return *this; }
+protected:
+	DualPointAndDataInfo< Dim , Real , Data , T , D > _dpInfo[1<<Dim];
+	unsigned int _size;
+};
+
+
+////////////////////////////
+// The virtual integrator //
+////////////////////////////
+struct BaseFEMIntegrator
+{
+	template< typename TDegreePack                                            > struct                  System{};
+	template< typename TDegreePack                                            > struct RestrictionProlongation{};
+	template< typename TDegreePack , typename CDegreePack , unsigned int CDim > struct              Constraint{};
+	template< typename TDegreePack                                            > struct        SystemConstraint{};
+	template< typename TDegreePack                                            > struct          PointEvaluator{};
+
+protected:
+	template< unsigned int Degree , unsigned int ... Degrees >
+	static typename std::enable_if< sizeof ... ( Degrees )==0 , bool >::type _IsInteriorlySupported( UIntPack< Degree , Degrees ... > , unsigned int depth , const int off[] )
+	{
+		int begin , end;
+		BSplineSupportSizes< Degree >::InteriorSupportedSpan( depth , begin , end );
+		return off[0]>=begin && off[0]<end;
+	}
+	template< unsigned int Degree , unsigned int ... Degrees >
+	static typename std::enable_if< sizeof ... ( Degrees )!=0 , bool >::type _IsInteriorlySupported( UIntPack< Degree , Degrees ... > , unsigned int depth , const int off[] )
+	{
+		int begin , end;
+		BSplineSupportSizes< Degree >::InteriorSupportedSpan( depth , begin , end );
+		return ( off[0]>=begin && off[0]<end ) && _IsInteriorlySupported( UIntPack< Degrees ... >() , depth , off+1 );
+	}
+	template< unsigned int Degree , unsigned int ... Degrees >
+	static typename std::enable_if< sizeof ... ( Degrees )==0 , bool >::type _IsInteriorlySupported( UIntPack< Degree , Degrees ... > , unsigned int depth , const int off[] , const double begin[] , const double end[] )
+	{
+		int res = 1<<depth;
+		double b = ( 0. + off[0] + BSplineSupportSizes< Degree >::SupportStart ) / res;
+		double e = ( 1. + off[0] + BSplineSupportSizes< Degree >::SupportEnd   ) / res; 
+		return b>=begin[0] && e<=end[0];
+	}
+	template< unsigned int Degree , unsigned int ... Degrees >
+	static typename std::enable_if< sizeof ... ( Degrees )!=0 , bool >::type _IsInteriorlySupported( UIntPack< Degree , Degrees ... > , unsigned int depth , const int off[] , const double begin[] , const double end[] )
+	{
+		int res = 1<<depth;
+		double b = ( 0. + off[0] + BSplineSupportSizes< Degree >::SupportStart ) / res;
+		double e = ( 1. + off[0] + BSplineSupportSizes< Degree >::SupportEnd   ) / res; 
+		return b>=begin[0] && e<=end[0] && _IsInteriorlySupported( UIntPack< Degrees ... >() , depth , off+1 , begin+1 , end+1 );
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )==0 >::type _InteriorOverlappedSpan( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , int depth , int begin[] , int end[] )
+	{
+		BSplineIntegrationData< FEMDegreeAndBType< Degree1 , BOUNDARY_NEUMANN >::Signature , FEMDegreeAndBType< Degree2 , BOUNDARY_NEUMANN >::Signature >::InteriorOverlappedSpan( depth , begin[0] , end[0] );
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )!=0 >::type _InteriorOverlappedSpan( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , int depth , int begin[] , int end[] )
+	{
+		BSplineIntegrationData< FEMDegreeAndBType< Degree1 , BOUNDARY_NEUMANN >::Signature , FEMDegreeAndBType< Degree2 , BOUNDARY_NEUMANN >::Signature >::InteriorOverlappedSpan( depth , begin[0] , end[0] );
+		_InteriorOverlappedSpan( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , begin+1 , end+1 );
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )==0 , bool >::type _IsInteriorlyOverlapped( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , unsigned int depth , const int off[] )
+	{
+		int begin , end;
+		BSplineIntegrationData< FEMDegreeAndBType< Degree1 , BOUNDARY_NEUMANN >::Signature , FEMDegreeAndBType< Degree2 , BOUNDARY_NEUMANN >::Signature >::InteriorOverlappedSpan( depth , begin , end );
+		return off[0]>= begin && off[0]<end;
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )!=0 , bool >::type _IsInteriorlyOverlapped( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , unsigned int depth , const int off[] )
+	{
+		int begin , end;
+		BSplineIntegrationData< FEMDegreeAndBType< Degree1 , BOUNDARY_NEUMANN >::Signature , FEMDegreeAndBType< Degree2 , BOUNDARY_NEUMANN >::Signature >::InteriorOverlappedSpan( depth , begin , end );
+		return ( off[0]>= begin && off[0]<end ) && _IsInteriorlyOverlapped( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , off+1 );
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )==0 >::type _ParentOverlapBounds( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , unsigned int depth , const int off[] , int start[] , int end[] )
+	{
+		const int OverlapStart = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart;
+		start[0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[ off[0] & 1 ] - OverlapStart;
+		end  [0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [ off[0] & 1 ] - OverlapStart + 1;
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )!=0 >::type _ParentOverlapBounds( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , unsigned int depth , const int off[] , int start[] , int end[] )
+	{
+		const int OverlapStart = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart;
+		start[0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[ off[0] & 1 ] - OverlapStart;
+		end  [0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [ off[0] & 1 ] - OverlapStart + 1;
+		_ParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , off+1 , start+1 , end+1 );
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )==0 >::type _ParentOverlapBounds( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , int corner , int start[] , int end[] )
+	{
+		const int OverlapStart = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart;
+		start[0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[ corner & 1 ] - OverlapStart;
+		end  [0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [ corner & 1 ] - OverlapStart + 1;
+	}
+	template< unsigned int Degree1 , unsigned int ... Degrees1 , unsigned int Degree2 , unsigned int ... Degrees2 >
+	static typename std::enable_if< sizeof ... ( Degrees1 )!=0 >::type _ParentOverlapBounds( UIntPack< Degree1 , Degrees1 ... > , UIntPack< Degree2 , Degrees2 ... > , int corner , int start[] , int end[] )
+	{
+		const int OverlapStart = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart;
+		start[0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapStart[ corner & 1 ] - OverlapStart;
+		end  [0] = BSplineOverlapSizes< Degree1 , Degree2 >::ParentOverlapEnd  [ corner & 1 ] - OverlapStart + 1;
+		_ParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , corner>>1 , start+1 , end+1 );
+	}
+
+public:
+	template< unsigned int ... Degrees >
+	static bool IsInteriorlySupported( UIntPack< Degrees ... > , int depth , const int offset[] ){ return depth>=0 && _IsInteriorlySupported( UIntPack< Degrees ... >() , depth , offset ); }
+	template< unsigned int ... Degrees >
+	static bool IsInteriorlySupported( UIntPack< Degrees ... > , int depth , const int offset[] , const double begin[] , const double end[] ){ return depth>=0 && _IsInteriorlySupported( UIntPack< Degrees ... >() , depth , offset , begin , end ); }
+
+	template< unsigned int ... Degrees1 , unsigned int ... Degrees2 >
+	static void InteriorOverlappedSpan( UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , int depth , int begin[] , int end[] )
+	{
+		static_assert( sizeof ... ( Degrees1 ) == sizeof ... ( Degrees2 ) , "[ERROR] Dimensions don't match" );
+		_InteriorOverlappedSpan( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , begin , end );
+	}
+	template< unsigned int ... Degrees1 , unsigned int ... Degrees2 >
+	static bool IsInteriorlyOverlapped( UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , int depth , const int offset[] )
+	{
+		static_assert( sizeof ... ( Degrees1 ) == sizeof ... ( Degrees2 ) , "[ERROR] Dimensions don't match" );
+		return depth>=0 && _IsInteriorlyOverlapped( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , offset );
+	}
+
+	template< unsigned int ... Degrees1 , unsigned int ... Degrees2 >
+	static void ParentOverlapBounds( UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , int depth , const int offset[] , int start[] , int end[] )
+	{
+		static_assert( sizeof ... ( Degrees1 ) == sizeof ... ( Degrees2 ) , "[ERROR] Dimensions don't match" );
+		if( depth>0 ) _ParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , depth , offset , start , end );
+	}
+	template< unsigned int ... Degrees1 , unsigned int ... Degrees2 >
+	static void ParentOverlapBounds( UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , int corner , int start[] , int end[] )
+	{
+		static_assert( sizeof ... ( Degrees1 ) == sizeof ... ( Degrees2 ) , "[ERROR] Dimensions don't match" );
+		_ParentOverlapBounds( UIntPack< Degrees1 ... >() , UIntPack< Degrees2 ... >() , corner , start , end );
+	}
+
+	template< unsigned int Dim >
+	struct PointEvaluatorState
+	{
+		virtual double value( const int offset[] , const unsigned int d[] ) const = 0;
+		virtual double subValue( const int offset[] , const unsigned int d[] ) const = 0;
+		template< class Real , typename DerivativeType >
+		Point< Real , DerivativeType::Size > dValues( const int offset[] ) const
+		{
+			Point< Real , DerivativeType::Size > v;
+			unsigned int _d[Dim];
+			for( int d=0 ; d<DerivativeType::Size ; d++ )
+			{
+				DerivativeType::Factor( d , _d );
+				v[d] = (Real)value( offset , _d );
+			}
+			return v;
+		}
+		template< class Real , typename DerivativeType >
+		Point< Real , DerivativeType::LastDerivative+1 > partialDotDValues( Point< Real , DerivativeType::Size > v , const int offset[] ) const
+		{
+			Point< Real , DerivativeType::LastDerivative+1 > dot;
+			unsigned int _d[Dim];
+			for( int d=0 ; d<DerivativeType::Size ; d++ )
+			{
+				DerivativeType::Factor( d , _d );
+				dot[ _d[Dim-1] ] += (Real)( subValue( offset , _d ) * v[d] );
+			}
+			return dot;
+		}
+	};
+
+	template< unsigned int ... TDegrees >
+	struct PointEvaluator< UIntPack< TDegrees ... > >
+	{
+		static const unsigned int Dim = sizeof ... ( TDegrees );
+	};
+
+	template< unsigned int ... TDegrees >
+	struct RestrictionProlongation< UIntPack< TDegrees ... > >
+	{
+		virtual void init( void ){ }
+		virtual double upSampleCoefficient( const int pOff[] , const int cOff[] ) const = 0;
+
+		typedef DynamicWindow< double , UIntPack< ( - BSplineSupportSizes< TDegrees >::DownSample0Start + BSplineSupportSizes< TDegrees >::DownSample1End + 1 ) ... > > DownSampleStencil;
+		struct   UpSampleStencil  : public DynamicWindow< double , UIntPack< BSplineSupportSizes< TDegrees >::UpSampleSize ... > > { };
+		struct DownSampleStencils : public DynamicWindow< DownSampleStencil , IsotropicUIntPack< sizeof ... ( TDegrees ) , 2 > > { };
+
+		void init( int highDepth ){ _highDepth = highDepth ; init(); }
+		void setStencil (   UpSampleStencil & stencil  ) const;
+		void setStencils( DownSampleStencils& stencils ) const;
+		int highDepth( void ) const { return _highDepth; }
+
+	protected:
+		int _highDepth;
+	};
+
+
+	template< unsigned int ... TDegrees >
+	struct System< UIntPack< TDegrees ... > >
+	{
+		virtual void init( void ){ }
+		virtual double ccIntegrate( const int off1[] , const int off2[] ) const = 0;
+		virtual double pcIntegrate( const int off1[] , const int off2[] ) const = 0;
+		virtual bool vanishesOnConstants( void ) const { return false; }
+		virtual RestrictionProlongation< UIntPack< TDegrees ... > >& restrictionProlongation( void ) = 0;
+
+		struct CCStencil : public DynamicWindow< double , UIntPack< BSplineOverlapSizes< TDegrees , TDegrees >::OverlapSize ... > >{ };
+#ifdef SHOW_WARNINGS
+#pragma message ( "[WARNING] Why are the parent/child stencils so big?" )
+#endif // SHOW_WARNINGS
+		struct PCStencils : public DynamicWindow< CCStencil , IsotropicUIntPack< sizeof ... ( TDegrees ) , 2 > >{ };
+
+		void init( int highDepth ){ _highDepth = highDepth ; init(); }
+		template< bool IterateFirst > void setStencil ( CCStencil & stencil  ) const;
+		template< bool IterateFirst > void setStencils( PCStencils& stencils ) const;
+		int highDepth( void ) const { return _highDepth; }
+
+	protected:
+		int _highDepth;
+	};
+
+	template< unsigned int ... TDegrees , unsigned int ... CDegrees , unsigned int CDim >
+	struct Constraint< UIntPack< TDegrees ... > , UIntPack< CDegrees ... > , CDim >
+	{
+		static_assert( sizeof...(TDegrees)==sizeof...(CDegrees) , "[ERROR] BaseFEMIntegrator::Constraint: Test and constraint dimensions don't match" );
+
+		virtual void init( void ){ ; }
+		virtual Point< double , CDim > ccIntegrate( const int off1[] , const int off2[] ) const = 0;
+		virtual Point< double , CDim > pcIntegrate( const int off1[] , const int off2[] ) const = 0;
+		virtual Point< double , CDim > cpIntegrate( const int off1[] , const int off2[] ) const = 0;
+		virtual RestrictionProlongation< UIntPack< TDegrees ... > >& tRestrictionProlongation( void ) = 0;
+		virtual RestrictionProlongation< UIntPack< CDegrees ... > >& cRestrictionProlongation( void ) = 0;
+
+		struct CCStencil : public DynamicWindow< Point< double , CDim > , UIntPack< BSplineOverlapSizes< TDegrees , CDegrees >::OverlapSize ... > >{ };
+#ifdef SHOW_WARNINGS
+#pragma message ( "[WARNING] Why are the parent/child stencils so big?" )
+#endif // SHOW_WARNINGS
+		struct PCStencils : public DynamicWindow< CCStencil , IsotropicUIntPack< sizeof ... ( TDegrees ) , 2 > >{ };
+		struct CPStencils : public DynamicWindow< CCStencil , IsotropicUIntPack< sizeof ... ( TDegrees ) , 2 > >{ };
+
+		void init( int highDepth ){ _highDepth = highDepth ; init(); }
+		template< bool IterateFirst > void setStencil ( CCStencil & stencil  ) const;
+		template< bool IterateFirst > void setStencils( PCStencils& stencils ) const;
+		template< bool IterateFirst > void setStencils( CPStencils& stencils ) const;
+		int highDepth( void ) const { return _highDepth; }
+
+	protected:
+		int _highDepth;
+	};
+
+	template< unsigned int ... TDegrees >
+	struct SystemConstraint< UIntPack< TDegrees ... > > :  public Constraint< UIntPack< TDegrees ... > , UIntPack< TDegrees ... > , 1 >
+	{
+		typedef  Constraint< UIntPack< TDegrees ... > , UIntPack< TDegrees ... > , 1 > Base;
+		SystemConstraint( System< UIntPack< TDegrees ... > >& sys ) : _sys( sys ){;}
+		void init( void ){ _sys.init( Base::highDepth() ) ; _sys.init(); }
+		Point< double , 1 > ccIntegrate( const int off1[] , const int off2[] ) const{ return Point< double , 1 >( _sys.ccIntegrate( off1 , off2 ) ); }
+		Point< double , 1 > pcIntegrate( const int off1[] , const int off2[] ) const{ return Point< double , 1 >( _sys.pcIntegrate( off1 , off2 ) ); }
+		Point< double , 1 > cpIntegrate( const int off1[] , const int off2[] ) const{ return Point< double , 1 >( _sys.pcIntegrate( off2 , off1 ) ); }
+		RestrictionProlongation< UIntPack< TDegrees ... > >& tRestrictionProlongation( void ){ return _sys.restrictionProlongation(); }
+		RestrictionProlongation< UIntPack< TDegrees ... > >& cRestrictionProlongation( void ){ return _sys.restrictionProlongation(); }
+	protected:
+		System< UIntPack< TDegrees ... > >& _sys;
+	};
+};
+
+/////////////////////////////////////////////////
+// An implementation of the virtual integrator //
+/////////////////////////////////////////////////
+struct FEMIntegrator
+{
+protected:
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )==0 , bool >::type _IsValidFEMNode( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] )
+	{
+		return !BSplineEvaluationData< FEMSig >::OutOfBounds( depth , offset[0] );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )!=0 , bool >::type _IsValidFEMNode( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] )
+	{
+		return !BSplineEvaluationData< FEMSig >::OutOfBounds( depth , offset[0] ) && _IsValidFEMNode( UIntPack< FEMSigs ... >() , depth , offset+1 );
+	}
+	template< unsigned int FEMSig , unsigned ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )==0 , bool >::type _IsOutOfBounds( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] )
+	{
+		return BSplineEvaluationData< FEMSig >::OutOfBounds( depth , offset[0] );
+	}
+	template< unsigned int FEMSig , unsigned ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )!=0 , bool >::type _IsOutOfBounds( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] )
+	{
+		return BSplineEvaluationData< FEMSig >::OutOfBounds( depth , offset[0] ) || _IsOutOfBounds( UIntPack< FEMSigs ... >() , depth , offset+1 );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )==0 >::type _BSplineBegin( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , int begin[] )
+	{
+		begin[0] = BSplineEvaluationData< FEMSig >::Begin( depth );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )!=0 >::type _BSplineBegin( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , int begin[] )
+	{
+		begin[0] = BSplineEvaluationData< FEMSig >::Begin( depth ) ; _BSplineBegin( UIntPack< FEMSigs ... >() , depth , begin+1 );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )==0 >::type _BSplineEnd( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , int end[] )
+	{
+		end[0] = BSplineEvaluationData< FEMSig >::End( depth );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )!=0 >::type _BSplineEnd( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , int end[] )
+	{
+		end[0] = BSplineEvaluationData< FEMSig >::End( depth ) ; _BSplineEnd( UIntPack< FEMSigs ... >() , depth , end+1 );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )==0 , double >::type _Integral( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] , const double begin[] , const double end[] )
+	{
+		return BSplineEvaluationData< FEMSig >::Integral( depth , offset[0] , begin[0] , end[0] , 0 );
+	}
+	template< unsigned int FEMSig , unsigned int ... FEMSigs >
+	static typename std::enable_if< sizeof ... ( FEMSigs )!=0 , double >::type _Integral( UIntPack< FEMSig , FEMSigs ... > , unsigned int depth , const int offset[] , const double begin[] , const double end[] )
+	{
+		return BSplineEvaluationData< FEMSig >::Integral( depth , offset[0] , begin[0] , end[0] , 0 ) * _Integral( UIntPack< FEMSigs ... >() , depth , offset+1 , begin+1 , end+1 );
+	}
+public:
+	template< unsigned int ... FEMSigs >
+	static double Integral( UIntPack< FEMSigs ... > , int depth , const int offset[] , const double begin[] , const double end[] )
+	{
+		if( depth<0 ) return 0;
+		else return _Integral( UIntPack< FEMSigs ... >() , depth , offset , begin , end );
+	}
+	template< unsigned int ... FEMSigs > static bool IsValidFEMNode( UIntPack< FEMSigs ... > , int depth , const int offset[] ){ return _IsValidFEMNode( UIntPack< FEMSigs ... >() , depth , offset ); }
+	template< unsigned int ... FEMSigs > static bool IsOutOfBounds( UIntPack< FEMSigs ... > , int depth , const int offset[] ){ return depth<0 || _IsOutOfBounds( UIntPack< FEMSigs ... >() , depth , offset ); }
+	template< unsigned int ... FEMSigs > static void BSplineBegin( UIntPack< FEMSigs ... > , int depth , int begin[] ){ if( depth>=0 ) _BSplineBegin( UIntPack< FEMSigs ... >() , depth , begin ); }
+	template< unsigned int ... FEMSigs > static void BSplineEnd  ( UIntPack< FEMSigs ... > , int depth , int end  [] ){ if( depth>=0 ) _BSplineEnd  ( UIntPack< FEMSigs ... >() , depth , end   ); }
+
+	template< typename TSignatures , typename TDerivatives                                                                    > struct                  System{};
+	template< typename TSignatures , typename TDerivatives , typename CSignatures , typename CDerivatives , unsigned int CDim > struct              Constraint{};
+	template< typename TSignatures , typename TDerivatives , typename CSignatures , typename CDerivatives                     > struct        ScalarConstraint{};
+	template< typename TSignatures                                                                                            > struct RestrictionProlongation{};
+	template< typename TSignatures , typename TDerivatives                                                                    > struct          PointEvaluator{};
+	template< typename TSignatures , typename TDerivatives                                                                    > struct     PointEvaluatorState{};
+
+	template< unsigned int ... TSignatures , unsigned int ... TDs >
+	struct PointEvaluatorState< UIntPack< TSignatures ... > , UIntPack< TDs ... > > : public BaseFEMIntegrator::template PointEvaluatorState< sizeof ... ( TSignatures ) >
+	{
+		static_assert( sizeof...(TSignatures)==sizeof...(TDs) , "[ERROR] Degree and derivative dimensions don't match" );
+		static_assert( UIntPack< FEMSignature< TSignatures >::Degree ... >::template Compare< UIntPack< TDs ... > >::GreaterThanOrEqual , "[ERROR] PointEvaluatorState: More derivatives than degrees" );
+
+		static const unsigned int Dim = sizeof...(TSignatures);
+
+		double value   ( const int offset[] , const unsigned int derivatives[] ) const { return _value< Dim   >( offset , derivatives ); }
+		double subValue( const int offset[] , const unsigned int derivatives[] ) const { return _value< Dim-1 >( offset , derivatives ); }
+		// Bypassing the "auto" keyword 
+		template< unsigned int _Dim >
+		const double (*(values)( void ) const )[ UIntPack< TDs ... >::template Get< _Dim >()+1 ] { return std::template get< _Dim >( _oneDValues ).values; }
+	protected:
+		int _pointOffset[Dim];
+
+		template< unsigned int Degree , unsigned int D > struct _OneDValues
+		{
+			double values[ BSplineSupportSizes< Degree >::SupportSize ][ D+1 ];
+			double value( int dOff , unsigned int d ) const
+			{
+				if( dOff>=-BSplineSupportSizes< Degree >::SupportEnd && dOff<=-BSplineSupportSizes< Degree >::SupportStart && d<=D ) return values[ dOff+BSplineSupportSizes< Degree >::SupportEnd][d];
+				else return 0;
+			}
+		};
+		std::tuple< _OneDValues< FEMSignature< TSignatures >::Degree , TDs > ... > _oneDValues;
+		template< unsigned int MaxDim=Dim , unsigned int I=0 > typename std::enable_if< I==MaxDim , double >::type _value( const int off[] , const unsigned int d[] ) const { return 1.; }
+		template< unsigned int MaxDim=Dim , unsigned int I=0 > typename std::enable_if< I!=MaxDim , double >::type _value( const int off[] , const unsigned int d[] ) const { return std::get< I >( _oneDValues ).value( off[I]-_pointOffset[I] , d[I] ) * _value< MaxDim , I+1 >( off , d ); }
+		template< typename T1 , typename T2 > friend struct PointEvaluator;
+	};
+
+	template< unsigned int ... TSignatures , unsigned int ... TDs >
+	struct PointEvaluator< UIntPack< TSignatures ... > , UIntPack< TDs ... > > : public BaseFEMIntegrator::template PointEvaluator< UIntPack< FEMSignature< TSignatures >::Degree ... > >
+	{
+		static_assert( sizeof...(TSignatures)==sizeof...(TDs) , "[ERROR] PointEvaluator: Degree and derivative dimensions don't match" );
+		static_assert( UIntPack< FEMSignature< TSignatures >::Degree ... >::template Compare< UIntPack< TDs ... > >::GreaterThanOrEqual , "[ERROR] PointEvaluator: More derivatives than degrees" );
+
+		static const unsigned int Dim = sizeof ... ( TSignatures );
+
+		typedef typename BaseFEMIntegrator::template PointEvaluator< UIntPack< FEMSignature< TSignatures >::Degree ... > > Base;
+
+		PointEvaluator( unsigned int maxDepth ) : _maxDepth( maxDepth ) { _init(); }
+		template< unsigned int ... EDs >
+		void initEvaluationState( Point< double , Dim > p , unsigned int depth , PointEvaluatorState< UIntPack< TSignatures ... > , UIntPack< EDs ... > >& state ) const
+		{
+			unsigned int res = 1<<depth;
+			for( int d=0 ; d<Dim ; d++ ) state._pointOffset[d] = (int)( p[d] * res );
+			initEvaluationState( p , depth , state._pointOffset , state );
+		}
+		template< unsigned int ... EDs >
+		void initEvaluationState( Point< double , Dim > p , unsigned int depth , const int* offset , PointEvaluatorState< UIntPack< TSignatures ... > , UIntPack< EDs ... > >& state ) const
+		{
+			static_assert( UIntPack< TDs ... >::template Compare< UIntPack< EDs ... > >::GreaterThanOrEqual , "[ERROR] PointEvaluator::init: More evaluation derivatives than stored derivatives" );
+			for( int d=0 ; d<Dim ; d++ ) state._pointOffset[d] = (int)offset[d];
+			_initEvaluationState( UIntPack< TSignatures ... >() , UIntPack< EDs ... >() , &p[0] , depth , state );
+		}
+	protected:
+		unsigned int _maxDepth;
+		std::tuple< BSplineData< TSignatures , TDs > ... > _bSplineData;
+		template< unsigned int I=0 > typename std::enable_if< I==Dim >::type _init( void ){}
+		template< unsigned int I=0 > typename std::enable_if< I< Dim >::type _init( void ){ std::get< I >( _bSplineData ).reset( _maxDepth ) ; _init< I+1 >( ); }
+
+		template< unsigned int I , unsigned int TSig , unsigned int D , typename State >
+		void _setEvaluationState( const double* p , unsigned int depth , State& state ) const
+		{
+			static const int       LeftSupportRadius = -BSplineSupportSizes< FEMSignature< TSig >::Degree >::SupportStart;
+			static const int  LeftPointSupportRadius =  BSplineSupportSizes< FEMSignature< TSig >::Degree >::SupportEnd  ;
+			static const int      RightSupportRadius =  BSplineSupportSizes< FEMSignature< TSig >::Degree >::SupportEnd  ;
+			static const int RightPointSupportRadius = -BSplineSupportSizes< FEMSignature< TSig >::Degree >::SupportStart;
+			for( int s=-LeftPointSupportRadius ; s<=RightPointSupportRadius ; s++ )
+			{
+				int pIdx = state._pointOffset[I];
+				int fIdx = state._pointOffset[I]+s;
+				double _p = p[I];
+				const Polynomial< FEMSignature< TSig >::Degree >* components = std::get< I >( _bSplineData )[depth].polynomialsAndOffset( _p , pIdx , fIdx );
+				for( int d=0 ; d<=D ; d++ ) std::get< I >( state._oneDValues ).values[ s+LeftPointSupportRadius ][d] = components[d]( _p );
+			}
+		}
+		template< typename State , unsigned int TSig , unsigned int ... TSigs , unsigned int D , unsigned int ... Ds >
+		typename std::enable_if< sizeof...(TSigs)==0 >::type _initEvaluationState( UIntPack< TSig , TSigs ... > , UIntPack< D , Ds ... > , const double* p , unsigned int depth , State& state ) const
+		{
+			_setEvaluationState< Dim-1 , TSig , D >( p , depth , state );
+		}
+		template< typename State , unsigned int TSig , unsigned int ... TSigs , unsigned int D , unsigned int ... Ds >
+		typename std::enable_if< sizeof...(TSigs)!=0 >::type _initEvaluationState( UIntPack< TSig , TSigs ... > , UIntPack< D , Ds ... > , const double* p , unsigned int depth , State& state ) const
+		{
+			_setEvaluationState< Dim-1-sizeof...(TSigs) , TSig , D >( p , depth , state );
+			_initEvaluationState( UIntPack< TSigs ... >() , UIntPack< Ds ... >() , p , depth , state );
+		}
+	};
+
+	template< unsigned int ... TSignatures >
+	struct RestrictionProlongation< UIntPack< TSignatures ... > > : public BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< TSignatures >::Degree ... > >
+	{
+		static const unsigned int Dim = sizeof ... ( TSignatures );
+		typedef typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< TSignatures >::Degree ... > > Base;
+
+		double upSampleCoefficient( const int pOff[] , const int cOff[] ) const { return _coefficient( pOff , cOff ); }
+		void init( unsigned int depth ){ Base::init( depth ); }
+		void init( void ){ _init( Base::highDepth() ); }
+
+	protected:
+		std::tuple< typename BSplineEvaluationData< TSignatures >::UpSampleEvaluator ... > _upSamplers;
+
+		template< unsigned int D=0 > typename std::enable_if< D==Dim >::type _init( int highDepth ){ }
+		template< unsigned int D=0 > typename std::enable_if< D< Dim >::type _init( int highDepth ){ std::get< D >( _upSamplers ).set( highDepth-1 ) ; _init< D+1 >( highDepth ); }
+		template< unsigned int D=0 > typename std::enable_if< D==Dim , double >::type _coefficient( const int pOff[] , const int cOff[] ) const { return 1.; }
+		template< unsigned int D=0 > typename std::enable_if< D< Dim , double >::type _coefficient( const int pOff[] , const int cOff[] ) const { return _coefficient< D+1 >( pOff , cOff ) * std::get< D >( _upSamplers ).value( pOff[D] , cOff[D] ); }
+	};
+
+	template< unsigned int ... TSignatures , unsigned int ... TDerivatives , unsigned int ... CSignatures , unsigned int ... CDerivatives , unsigned int CDim >
+	struct Constraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > , UIntPack< CSignatures ... > , UIntPack< CDerivatives ... > , CDim > : public BaseFEMIntegrator::template Constraint< UIntPack< FEMSignature< TSignatures >::Degree ... > , UIntPack< FEMSignature< CSignatures >::Degree ... > , CDim >
+	{
+		static_assert( sizeof ... ( TSignatures ) == sizeof ... ( CSignatures ) , "[ERROR] Test signatures and contraint signatures must have the same dimension" );
+		static_assert( sizeof ... ( TSignatures ) == sizeof ... ( TDerivatives ) , "[ERROR] Test signatures and derivatives must have the same dimension" );
+		static_assert( sizeof ... ( CSignatures ) == sizeof ... ( CDerivatives ) , "[ERROR] Constraint signatures and derivatives must have the same dimension" );
+		static_assert( UIntPack< FEMSignature< TSignatures >::Degree ... >::template Compare< UIntPack< TDerivatives ... > >::GreaterThanOrEqual , "[ERROR] Test functions cannot have more derivatives than the degree" );
+		static_assert( UIntPack< FEMSignature< CSignatures >::Degree ... >::template Compare< UIntPack< CDerivatives ... > >::GreaterThanOrEqual , "[ERROR] Test functions cannot have more derivatives than the degree" );
+
+		static const unsigned int Dim = sizeof ... ( TSignatures );
+		typedef typename BaseFEMIntegrator::template Constraint< UIntPack< FEMSignature< TSignatures >::Degree ... > , UIntPack< FEMSignature< CSignatures >::Degree ... > , CDim > Base;
+
+		static const unsigned int TDerivativeSize = TensorDerivatives< UIntPack< TDerivatives ... > >::Size;
+		static const unsigned int CDerivativeSize = TensorDerivatives< UIntPack< CDerivatives ... > >::Size;
+		static inline void TFactorDerivatives( unsigned int idx , unsigned int d[ Dim ] ){ TensorDerivatives< UIntPack< TDerivatives ... > >::Factor( idx , d ); }
+		static inline void CFactorDerivatives( unsigned int idx , unsigned int d[ Dim ] ){ TensorDerivatives< UIntPack< CDerivatives ... > >::Factor( idx , d ); }
+		static inline unsigned int TDerivativeIndex( const unsigned int d[ Dim ] ){ return TensorDerivatives< UIntPack< TDerivatives ... > >::Index( d ); }
+		static inline unsigned int CDerivativeIndex( const unsigned int d[ Dim ] ){ return TensorDerivatives< UIntPack< CDerivatives ... > >::Index( d ); }
+		Matrix< double , TDerivativeSize , CDerivativeSize > weights[CDim];
+
+		Point< double , CDim > ccIntegrate( const int off1[] , const int off2[] ) const { return _integrate( INTEGRATE_CHILD_CHILD  , off1 , off2 ); }
+		Point< double , CDim > pcIntegrate( const int off1[] , const int off2[] ) const { return _integrate( INTEGRATE_PARENT_CHILD , off1 , off2 ); }
+		Point< double , CDim > cpIntegrate( const int off1[] , const int off2[] ) const { return _integrate( INTEGRATE_CHILD_PARENT , off1 , off2 ); }
+
+		void init( unsigned int depth ){ Base::init( depth ); }
+		void init( void )
+		{
+
+			_init( Base::highDepth() );
+			_weightedIndices.resize(0);
+			for( unsigned int d1=0 ; d1<TDerivativeSize ; d1++ ) for( unsigned int d2=0 ; d2<CDerivativeSize ; d2++ )
+			{
+				_WeightedIndices w(d1,d2);
+				for( unsigned int c=0 ; c<CDim ; c++ ) if( weights[c](d1,d2)>0 ) w.indices.push_back( std::pair< unsigned int , double >( c , weights[c](d1,d2) ) );
+				if( w.indices.size() ) _weightedIndices.push_back(w);
+			}
+		}
+		typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< TSignatures >::Degree ... > >& tRestrictionProlongation( void ){ return _tRestrictionProlongation; }
+		typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< CSignatures >::Degree ... > >& cRestrictionProlongation( void ){ return _cRestrictionProlongation; }
+	protected:
+		RestrictionProlongation< UIntPack< TSignatures ... > > _tRestrictionProlongation;
+		RestrictionProlongation< UIntPack< CSignatures ... > > _cRestrictionProlongation;
+		struct _WeightedIndices
+		{
+			_WeightedIndices( unsigned int _d1=0 , unsigned int _d2=0 ) : d1(_d1) , d2(_d2) { ; }
+			unsigned int d1 , d2;
+			std::vector< std::pair< unsigned int , double > > indices;
+		};
+		std::vector< _WeightedIndices > _weightedIndices;
+		enum IntegrationType
+		{
+			INTEGRATE_CHILD_CHILD ,
+			INTEGRATE_PARENT_CHILD ,
+			INTEGRATE_CHILD_PARENT
+		};
+
+		template< unsigned int _TSig , unsigned int _TDerivatives , unsigned int _CSig , unsigned int _CDerivatives >
+		struct _Integrators
+		{
+			typename BSplineIntegrationData< _TSig , _CSig >::FunctionIntegrator::template      Integrator< _TDerivatives , _CDerivatives > ccIntegrator;
+			typename BSplineIntegrationData< _TSig , _CSig >::FunctionIntegrator::template ChildIntegrator< _TDerivatives , _CDerivatives > pcIntegrator;
+			typename BSplineIntegrationData< _CSig , _TSig >::FunctionIntegrator::template ChildIntegrator< _CDerivatives , _TDerivatives > cpIntegrator;
+		};
+		std::tuple< _Integrators< TSignatures , TDerivatives , CSignatures , CDerivatives > ... > _integrators;
+
+		template< unsigned int D=0 >
+		typename std::enable_if< D==Dim >::type _init( int depth ){ ; }
+		template< unsigned int D=0 >
+		typename std::enable_if< D< Dim >::type _init( int depth )
+		{
+			std::get< D >( _integrators ).ccIntegrator.set( depth );
+			if( depth ) std::get< D >( _integrators ).pcIntegrator.set( depth-1 ) , std::get< D >( _integrators ).cpIntegrator.set( depth-1 );
+			_init< D+1 >( depth );
+		}
+		template< unsigned int D=0 >
+		typename std::enable_if< D==Dim , double >::type _integral( IntegrationType iType , const int off1[] , const int off2[] , const unsigned int d1[] , const unsigned int d2[] ) const { return 1.; }
+		template< unsigned int D=0 >
+		typename std::enable_if< D< Dim , double >::type _integral( IntegrationType iType , const int off1[] , const int off2[] , const unsigned int d1[] , const unsigned int d2[] ) const
+		{
+			double remainingIntegral = _integral< D+1 >( iType , off1 , off2 , d1 , d2 );
+			switch( iType )
+			{
+			case INTEGRATE_CHILD_CHILD:  return std::get< D >( _integrators ).ccIntegrator.dot( off1[D] , off2[D] , d1[D] , d2[D] ) * remainingIntegral;
+			case INTEGRATE_PARENT_CHILD: return std::get< D >( _integrators ).pcIntegrator.dot( off1[D] , off2[D] , d1[D] , d2[D] ) * remainingIntegral;
+			case INTEGRATE_CHILD_PARENT: return std::get< D >( _integrators ).cpIntegrator.dot( off2[D] , off1[D] , d2[D] , d1[D] ) * remainingIntegral;
+			default: ERROR_OUT( "Undefined integration type" );
+			}
+			return 0;
+		}
+		Point< double , CDim > _integrate( IntegrationType iType , const int off1[] , const int off[] ) const;
+	};
+
+	template< unsigned int ... TSignatures , unsigned int ... TDerivatives , unsigned int ... CSignatures , unsigned int ... CDerivatives >
+	struct ScalarConstraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > , UIntPack< CSignatures ... > , UIntPack< CDerivatives ... > > : public Constraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > , UIntPack< CSignatures ... > , UIntPack< CDerivatives ... > , 1 >
+	{
+		static const unsigned int Dim = sizeof ... ( TSignatures );
+		typedef typename BaseFEMIntegrator::template Constraint<  UIntPack< FEMSignature< TSignatures >::Degree ... > , UIntPack< FEMSignature< CSignatures >::Degree ... > , 1 > Base;
+
+		typedef Constraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > , UIntPack< CSignatures ... > , UIntPack< CDerivatives ... > , 1 > FullConstraint;
+		using FullConstraint::weights;
+		// [NOTE] We define the constructor using a recursive function call to take into account multiplicity (e.g. so that d^2/dxdy and d^2/dydx each contribute)
+		ScalarConstraint( const std::initializer_list< double >& w )
+		{
+			std::function< void ( unsigned int[] , const double[] , unsigned int ) > SetDerivativeWeights = [&]( unsigned int derivatives[Dim] , const double w[] , unsigned int d )
+			{
+				unsigned int idx1 = FullConstraint::TDerivativeIndex( derivatives ) , idx2 = FullConstraint::CDerivativeIndex( derivatives );
+				weights[0][idx1][idx2] += w[0];
+				if( d>0 ) for( int dd=0 ; dd<Dim ; dd++ ){ derivatives[dd]++ ; SetDerivativeWeights( derivatives , w+1 , d-1 ) ; derivatives[dd]--; }
+			};
+			static const unsigned int DMax = std::min< unsigned int >( UIntPack< TDerivatives ... >::Min() , UIntPack< CDerivatives ... >::Min() );
+
+			unsigned int derivatives[Dim];
+			double _w[DMax+1];
+			memset( _w , 0 , sizeof(_w) );
+			{
+				unsigned int dd=0;
+				for( typename std::initializer_list< double >::const_iterator iter=w.begin() ; iter!=w.end() && dd<=DMax ; dd++ , iter++ ) _w[dd] = *iter;
+			}
+			for( int d=0 ; d<Dim ; d++ ) derivatives[d] = 0;
+			if( w.size() ) SetDerivativeWeights( derivatives , _w , std::min< unsigned int >( DMax+1 , (unsigned int)w.size() )-1 );
+		}
+	};
+
+	template< unsigned int ... TSignatures , unsigned int ... TDerivatives >
+	struct System< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > > : public BaseFEMIntegrator::template System< UIntPack< FEMSignature< TSignatures >::Degree... > >
+	{
+		static_assert( sizeof ... ( TSignatures ) == sizeof ... ( TDerivatives ) , "[ERROR] Test signatures and derivatives must have the same dimension" );
+
+		static const unsigned int Dim = sizeof ... ( TSignatures );
+		typedef typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< TSignatures >::Degree... > > Base;
+
+		System( const std::initializer_list< double >& w ) : _sc( w ){ ; }
+		void init( unsigned int depth ){ Base::init( depth ); }
+		void init( void ){ ( (BaseFEMIntegrator::template Constraint< UIntPack< FEMSignature< TSignatures >::Degree ... > , UIntPack< FEMSignature< TSignatures >::Degree ... > , 1 >&)_sc ).init( BaseFEMIntegrator::template System< UIntPack< FEMSignature< TSignatures >::Degree... > >::_highDepth ); }
+		double ccIntegrate( const int off1[] , const int off2[] ) const { return _sc.ccIntegrate( off1 , off2 )[0]; }
+		double pcIntegrate( const int off1[] , const int off2[] ) const { return _sc.pcIntegrate( off1 , off2 )[0]; }
+		bool vanishesOnConstants( void ) const { return _sc.weights[0][0][0]==0; }
+
+		typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< FEMSignature< TSignatures >::Degree ... > >& restrictionProlongation( void ){ return _sc.tRestrictionProlongation(); }
+
+	protected:
+		ScalarConstraint< UIntPack< TSignatures ... > , UIntPack< TDerivatives ... >  , UIntPack< TSignatures ... > , UIntPack< TDerivatives ... > > _sc;
+	};
+};
+
+//////////////////////////////////////////
+
+template< unsigned int Dim > inline void SetGhostFlag(       RegularTreeNode< Dim , FEMTreeNodeData >* node , bool flag ){ if( node && node->parent ) node->parent->nodeData.setGhostFlag( flag ); }
+template< unsigned int Dim > inline bool GetGhostFlag( const RegularTreeNode< Dim , FEMTreeNodeData >* node ){ return node==NULL || node->parent==NULL || node->parent->nodeData.getGhostFlag( ); }
+template< unsigned int Dim > inline bool IsActiveNode( const RegularTreeNode< Dim , FEMTreeNodeData >* node ){ return !GetGhostFlag< Dim >( node ); }
+
+template< unsigned int Dim , class Real , class Vertex > struct IsoSurfaceExtractor;
+
+template< unsigned int Dim , class Data >
+struct NodeSample
+{
+	RegularTreeNode< Dim , FEMTreeNodeData >* node;
+	Data data;
+};
+template< unsigned int Dim , class Real >
+struct NodeAndPointSample
+{
+	RegularTreeNode< Dim , FEMTreeNodeData >* node;
+	ProjectiveData< Point< Real , Dim > , Real > sample;
+};
+template< unsigned int Dim , class Real > using NodeSimplices = NodeSample< Dim , std::vector< Simplex< Real , Dim , Dim-1 > > >;
+
+
+template< typename T > struct WindowLoopData{ };
+
+template< unsigned int ... Sizes >
+struct WindowLoopData< UIntPack< Sizes ... > >
+{
+	static const int Dim = sizeof ... ( Sizes );
+	unsigned int size[1<<Dim];
+	unsigned int indices[1<<Dim][ WindowSize< UIntPack< Sizes ... > >::Size ];
+	WindowLoopData( std::function< void ( int c , int* , int* ) > boundsFunction )
+	{
+		int start[Dim] , end[Dim];
+		for( int c=0 ; c<(1<<Dim) ; c++ )
+		{
+			size[c] = 0;
+			boundsFunction( c , start , end );
+			unsigned int idx[Dim];
+			WindowLoop< Dim >::Run
+			(
+				start , end ,
+				[&]( int d , int i ){ idx[d] = i; } ,
+				[&]( void ){ indices[c][ size[c]++ ] = GetWindowIndex( UIntPack< Sizes ... >() , idx ); }
+			);
+		}
+	}
+};
+
+template< class Data >
+void AddAtomic( Data& a , const Data& b )
+{
+#pragma omp critical
+	a += b;
+}
+template< class Real , unsigned int Dim >
+void AddAtomic( Point< Real , Dim >& a , const Point< Real , Dim >& b )
+{
+	for( int d=0 ; d<Dim ; d++ ) AddAtomic( a[d] , b[d] );
+}
+void AddAtomic( float& a , const float& b )
+{
+#pragma omp atomic
+	a += b;
+}
+void AddAtomic( double& a , const double& b )
+{
+#pragma omp atomic
+	a += b;
+}
+template< class Data >
+bool IsZero( const Data& data ){ return false; }
+template< class Real , unsigned int Dim >
+bool IsZero( const Point< Real , Dim >& d )
+{
+	bool zero = true;
+	for( int i=0 ; i<Dim ; i++ ) zero &= (d[i]==0); 
+	return zero;
+}
+bool IsZero( const float& f ){ return f==0.f; }
+bool IsZero( const double& f ){ return f==0.; }
+
+template< unsigned int Dim , class Real >
+class FEMTree
+{
+public:
+	typedef RegularTreeNode< Dim , FEMTreeNodeData > FEMTreeNode;
+	Allocator< FEMTreeNode >* nodeAllocator;
+protected:
+	template< unsigned int _Dim , class _Real , class Vertex > friend struct IsoSurfaceExtractor;
+	std::atomic< int > _nodeCount;
+	void _nodeInitializer( FEMTreeNode& node ){ node.nodeData.nodeIndex = _nodeCount++; }
+
+	struct _NodeInitializer
+	{
+		FEMTree& femTree;
+		_NodeInitializer( FEMTree& f ) : femTree(f){;}
+		void operator() ( FEMTreeNode& node ){ femTree._nodeInitializer( node ); }
+	};
+public:
+	typedef int LocalDepth;
+	typedef int LocalOffset[Dim];
+
+	int nodeCount( void ) const { return _nodeCount; }
+
+	typedef NodeAndPointSample< Dim , Real > PointSample;
+
+	typedef typename FEMTreeNode::template      NeighborKey< IsotropicUIntPack< Dim , 1 > , IsotropicUIntPack< Dim , 1 > >      OneRingNeighborKey;
+	typedef typename FEMTreeNode::template ConstNeighborKey< IsotropicUIntPack< Dim , 1 > , IsotropicUIntPack< Dim , 1 > > ConstOneRingNeighborKey;
+	typedef typename FEMTreeNode::template      Neighbors< IsotropicUIntPack< Dim , 3 > >      OneRingNeighbors;
+	typedef typename FEMTreeNode::template ConstNeighbors< IsotropicUIntPack< Dim , 3 > > ConstOneRingNeighbors;
+
+	template< typename FEMDegreePack >                        using BaseSystem          = typename BaseFEMIntegrator::template System< FEMDegreePack >;
+	template< typename FEMSigPack , typename DerivativePack > using PointEvaluator      = typename     FEMIntegrator::template PointEvaluator< FEMSigPack , DerivativePack >;
+	template< typename FEMSigPack , typename DerivativePack > using PointEvaluatorState = typename     FEMIntegrator::template PointEvaluatorState< FEMSigPack , DerivativePack >;	
+	template< typename FEMDegreePack > using CCStencil  = typename BaseSystem< FEMDegreePack >::CCStencil;
+	template< typename FEMDegreePack > using PCStencils = typename BaseSystem< FEMDegreePack >::PCStencils;
+
+	template< unsigned int ... FEMSigs > bool isValidFEMNode( UIntPack< FEMSigs ... > , const FEMTreeNode* node ) const;
+	bool isValidSpaceNode( const FEMTreeNode* node ) const;
+	const FEMTreeNode* leaf( Point< Real , Dim > p ) const;
+	FEMTreeNode* leaf( Point< Real , Dim > p , LocalDepth maxDepth=-1 );
+
+	// [NOTE] In the case that T != double, we require both operators() for computing the system dual
+	template< typename T , unsigned int PointD >
+	struct InterpolationInfo
+	{
+		virtual void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const = 0;
+		virtual Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const = 0;
+		virtual Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const = 0;
+		virtual Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const = 0;
+		virtual const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const = 0;
+		virtual bool constrainsDCTerm( void ) const = 0;
+		virtual ~InterpolationInfo( void ){}
+
+		DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIndex ){ return const_cast< DualPointInfo< Dim , Real , T , PointD >& >( ( ( const InterpolationInfo* )this )->operator[]( pointIndex ) ); }
+	};
+	template< unsigned int PointD >
+	struct InterpolationInfo< double , PointD >
+	{
+		virtual void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const = 0;
+		virtual Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const = 0;
+		virtual Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const = 0;
+		virtual const DualPointInfo< Dim , Real , double , PointD >& operator[]( size_t pointIdx ) const = 0;
+		virtual bool constrainsDCTerm( void ) const = 0;
+		virtual ~InterpolationInfo( void ){}
+
+		DualPointInfo< Dim , Real , double , PointD >& operator[]( size_t pointIndex ){ return const_cast< DualPointInfo< Dim , Real , double , PointD >& >( ( ( const InterpolationInfo* )this )->operator[]( pointIndex ) ); }
+	};
+
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximatePointInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = idx , end = idx+1;
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ]; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+
+		ApproximatePointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		SparseNodeData< DualPointInfo< Dim , Real , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximatePointInterpolationInfo< double , PointD , ConstraintDual , SystemDual > : public InterpolationInfo< double , PointD >
+	{
+		typedef double T;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = idx , end = idx+1;
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ]; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+
+		ApproximatePointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		SparseNodeData< DualPointInfo< Dim , Real , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximatePointAndDataInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = idx , end = idx+1;
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ].pointInfo; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+
+		ApproximatePointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		SparseNodeData< DualPointAndDataInfo< Dim , Real , Data , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximatePointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual > : public InterpolationInfo< double , PointD >
+	{
+		typedef double T;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = idx , end = idx+1;
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ].pointInfo; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+
+		ApproximatePointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		SparseNodeData< DualPointAndDataInfo< Dim , Real , Data , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximateChildPointInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = (idx<<Dim) , end = (idx<<Dim) | _iData[idx].size();
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return __iData(pointIdx); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( __iData(pointIdx).position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).position , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).position , dValues ); }
+
+		ApproximateChildPointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		static const unsigned int _Mask = (1<<Dim)-1;
+		SparseNodeData< DualPointInfoBrood< Dim , Real , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		DualPointInfo< Dim , Real , T , PointD >& __iData( size_t pointIdx ){ return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		const DualPointInfo< Dim , Real , T , PointD >& __iData( size_t pointIdx ) const { return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximateChildPointInterpolationInfo< double , PointD , ConstraintDual , SystemDual > : public InterpolationInfo< double , PointD >
+	{
+		typedef double T;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = (idx<<Dim) , end = (idx<<Dim) | _iData[idx].size();
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return __iData(pointIdx); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( __iData(pointIdx).position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).position , dValues ); }
+
+		ApproximateChildPointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		static const unsigned int _Mask = (1<<Dim)-1;
+		SparseNodeData< DualPointInfoBrood< Dim , Real , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		DualPointInfo< Dim , Real , T , PointD >& __iData( size_t pointIdx ){ return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		const DualPointInfo< Dim , Real , T , PointD >& __iData( size_t pointIdx ) const { return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximateChildPointAndDataInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = (idx<<Dim) , end = (idx<<Dim) | _iData[idx].size();
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return __iData(pointIdx).pointInfo; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( __iData(pointIdx).pointInfo.position , __iData(pointIdx).data ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).pointInfo.position , __iData(pointIdx).data , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).pointInfo.position , __iData(pointIdx).data , dValues ); }
+
+		ApproximateChildPointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		static const unsigned int _Mask = (1<<Dim)-1;
+		SparseNodeData< DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		DualPointAndDataInfo< Dim , Real , Data , T , PointD >& __iData( size_t pointIdx ){ return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		const DualPointAndDataInfo< Dim , Real , Data , T , PointD >& __iData( size_t pointIdx ) const { return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ApproximateChildPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual > : public InterpolationInfo< double , PointD >
+	{
+		typedef double T;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const
+		{
+			int idx = _iData.index( node );
+			if( idx<0 ) begin = end = 0;
+			else begin = (idx<<Dim) , end = (idx<<Dim) | _iData[idx].size();
+		}
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return __iData(pointIdx).pointInfo; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( __iData(pointIdx).pointInfo.position , __iData(pointIdx).data ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( __iData(pointIdx).pointInfo.position , __iData(pointIdx).data , dValues ); }
+
+		ApproximateChildPointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		static const unsigned int _Mask = (1<<Dim)-1;
+		SparseNodeData< DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD > , ZeroUIntPack< Dim > > _iData;
+		DualPointAndDataInfo< Dim , Real , Data , T , PointD >& __iData( size_t pointIdx ){ return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		const DualPointAndDataInfo< Dim , Real , Data , T , PointD >& __iData( size_t pointIdx ) const { return _iData[ (int)(pointIdx>>Dim) ][ pointIdx & _Mask ]; }
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ExactPointInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const { begin = _sampleSpan[ node->nodeData.nodeIndex ].first , end = _sampleSpan[ node->nodeData.nodeIndex ].second; }
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ]; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+
+		ExactPointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		void _init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , bool noRescale );
+
+		std::vector< std::pair< int , int > > _sampleSpan;
+		std::vector< DualPointInfo< Dim , Real , T , PointD > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ExactPointInterpolationInfo< double , PointD , ConstraintDual , SystemDual > : public InterpolationInfo< double , PointD >
+	{
+		typedef double T;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const { begin = _sampleSpan[ node->nodeData.nodeIndex ].first , end = _sampleSpan[ node->nodeData.nodeIndex ].second; }
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ]; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ (int)pointIdx ].position ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].position , dValues ); }
+
+		ExactPointInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		void _init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , bool noRescale );
+
+		std::vector< std::pair< int , int > > _sampleSpan;
+		std::vector< DualPointInfo< Dim , Real , T , PointD > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct _ExactPointAndDataInterpolationInfo : public InterpolationInfo< T , PointD >
+	{
+		_ExactPointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _constraintDual( constraintDual ) , _systemDual( systemDual ) , _constrainsDCTerm( constrainsDCTerm ) { }
+	protected:
+		void _init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , bool noRescale );
+
+		std::vector< std::pair< int , int > > _sampleSpan;
+		std::vector< DualPointAndDataInfo< Dim , Real , Data , T , PointD > > _iData;
+		bool _constrainsDCTerm;
+		ConstraintDual _constraintDual;
+		SystemDual _systemDual;
+
+		friend class FEMTree< Dim , Real >;
+	};
+
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ExactPointAndDataInterpolationInfo : public _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >
+	{
+		using _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_sampleSpan;
+		using _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_constrainsDCTerm;
+		using _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_iData;
+		using _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_constraintDual;
+		using _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_systemDual;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const { begin = _sampleSpan[ node->nodeData.nodeIndex ].first , end = _sampleSpan[ node->nodeData.nodeIndex ].second; }
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , T , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ].pointInfo; }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data ); }
+		Point< T , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< T , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+
+		ExactPointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm ) { }
+	};
+	template< typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	struct ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual > : public _ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual >
+	{
+		using _ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual >::_sampleSpan;
+		using _ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual >::_constrainsDCTerm;
+		using _ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual >::_iData;
+		void range( const FEMTreeNode* node , size_t& begin , size_t& end ) const { begin = _sampleSpan[ node->nodeData.nodeIndex ].first , end = _sampleSpan[ node->nodeData.nodeIndex ].second; }
+		bool constrainsDCTerm( void ) const { return _constrainsDCTerm; }
+		const DualPointInfo< Dim , Real , double , PointD >& operator[]( size_t pointIdx ) const { return _iData[ (int)pointIdx ].pointInfo; }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx ) const { return _constraintDual( _iData[ pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data ); }
+		Point< double , CumulativeDerivatives< Dim , PointD >::Size > operator() ( size_t pointIdx , const Point< double , CumulativeDerivatives< Dim , PointD >::Size >& dValues ) const { return _systemDual( _iData[ (int)pointIdx ].pointInfo.position , _iData[ (int)pointIdx ].data , dValues ); }
+		ExactPointAndDataInterpolationInfo( ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm ) : _ExactPointAndDataInterpolationInfo< double , Data , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm ) { }
+	};
+
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ApproximatePointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* InitializeApproximatePointInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , int adaptiveExponent )
+	{
+		ApproximatePointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* a = new ApproximatePointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		a->_iData = tree._densifyInterpolationInfoAndSetDualConstraints< T , PointD >( samples , constraintDual , adaptiveExponent );
+		return a;
+	}
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ApproximatePointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* InitializeApproximatePointAndDataInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , int adaptiveExponent )
+	{
+		ApproximatePointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* a = new ApproximatePointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		a->_iData = tree._densifyInterpolationInfoAndSetDualConstraints< T , Data , PointD >( samples , sampleData , constraintDual , adaptiveExponent );
+		return a;
+	}
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ApproximateChildPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* InitializeApproximateChildPointInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , bool noRescale  )
+	{
+		ApproximateChildPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* a = new ApproximateChildPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		a->_iData = tree._densifyChildInterpolationInfoAndSetDualConstraints< T , PointD >( samples , constraintDual , noRescale );
+		return a;
+	}
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ApproximateChildPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* InitializeApproximateChildPointAndDataInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , bool noRescale  )
+	{
+		ApproximateChildPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* a = new ApproximateChildPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		a->_iData = tree._densifyChildInterpolationInfoAndSetDualConstraints< T , Data , PointD >( samples , sampleData , constraintDual , noRescale );
+		return a;
+	}
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ExactPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* InitializeExactPointInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , bool noRescale )
+	{
+		ExactPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >* e = new ExactPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		e->_init( tree , samples , noRescale );
+		return e;
+	}
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+	static ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* InitializeExactPointAndDataInterpolationInfo( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , SystemDual systemDual , bool constrainsDCTerm , bool noRescale )
+	{
+		ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >* e = new ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >( constraintDual , systemDual , constrainsDCTerm );
+		e->_init( tree , samples , sampleData , noRescale );
+		return e;
+	}
+
+	template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual > friend struct ExactPointInterpolationInfo;
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual > friend struct ExactPointAndDataInterpolationInfo;
+
+	template< typename T , unsigned int PointD , unsigned int ... PointDs >
+	static bool ConstrainsDCTerm( const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ){ return ConstrainsDCTerm( iInfo ) || ConstrainsDCTerm( iInfos... ); }
+	template< typename T , unsigned int PointD >
+	static bool ConstrainsDCTerm( const InterpolationInfo< T , PointD >* iInfo ){ return iInfo && iInfo->constrainsDCTerm(); }
+	static bool ConstrainsDCTerm( void ){ return false; }
+
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] This should not be isotropic" )
+#endif // SHOW_WARNINGS
+	template< unsigned int DensityDegree > struct DensityEstimator : public SparseNodeData< Real , IsotropicUIntPack< Dim , FEMDegreeAndBType< DensityDegree >::Signature > >
+	{
+		DensityEstimator( int kernelDepth , int coDimension ) : _kernelDepth( kernelDepth ) , _coDimension( coDimension ){ ; }
+		int coDimension( void ) const { return _coDimension; }
+		int kernelDepth( void ) const { return _kernelDepth; }
+	protected:
+		int _kernelDepth , _coDimension;
+	};
+
+protected:
+	bool _isValidSpaceNode( const FEMTreeNode* node ) const { return !GetGhostFlag< Dim >( node ) && ( node->nodeData.flags & FEMTreeNodeData::SPACE_FLAG     ); }
+	bool _isValidFEM1Node ( const FEMTreeNode* node ) const { return !GetGhostFlag< Dim >( node ) && ( node->nodeData.flags & FEMTreeNodeData::FEM_FLAG_1     ); }
+	bool _isValidFEM2Node ( const FEMTreeNode* node ) const { return !GetGhostFlag< Dim >( node ) && ( node->nodeData.flags & FEMTreeNodeData::FEM_FLAG_2     ); }
+	bool _isRefinableNode ( const FEMTreeNode* node ) const { return !GetGhostFlag< Dim >( node ) && ( node->nodeData.flags & FEMTreeNodeData::REFINABLE_FLAG ); }
+
+	FEMTreeNode* _tree;
+	FEMTreeNode* _spaceRoot;
+	SortedTreeNodes< Dim > _sNodes;
+	LocalDepth _maxDepth;
+	int _depthOffset;
+	mutable unsigned int _femSigs1[ Dim ];
+	mutable unsigned int _femSigs2[ Dim ];
+	mutable unsigned int _refinableSigs[ Dim ];
+
+	static bool _InBounds( Point< Real , Dim > p );
+	int _localToGlobal( LocalDepth d ) const { return d + _depthOffset; }
+	LocalDepth _localDepth( const FEMTreeNode* node ) const { return node->depth() - _depthOffset; }
+	int _localInset( LocalDepth d ) const { return _depthOffset<=1 ? 0 : 1<<( d + _depthOffset - 1 ); }
+	void _localDepthAndOffset( const FEMTreeNode* node , LocalDepth& d , LocalOffset& off ) const
+	{
+		node->depthAndOffset( d , off ) ; d -= _depthOffset;
+		int inset = _localInset( d );
+		for( int d=0 ; d<Dim ; d++ ) off[d] -= inset;
+	}
+	template< unsigned int FEMSig > static int _BSplineBegin( LocalDepth depth ){ return BSplineEvaluationData< FEMSig >::Begin( depth ); }
+	template< unsigned int FEMSig > static int _BSplineEnd  ( LocalDepth depth ){ return BSplineEvaluationData< FEMSig >::End  ( depth ); }
+	template< unsigned int ... FEMSigs >
+	bool _outOfBounds( UIntPack< FEMSigs ... > , const FEMTreeNode* node ) const
+	{
+		if( !node ) return true;
+		LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+		return FEMIntegrator::IsOutOfBounds( UIntPack< FEMSigs ... >() , d , off );
+	}
+	int _sNodesBegin( LocalDepth d ) const { return _sNodes.begin( _localToGlobal( d ) ); }
+	int _sNodesEnd  ( LocalDepth d ) const { return _sNodes.end  ( _localToGlobal( d ) ); }
+	int _sNodesSize ( LocalDepth d ) const { return _sNodes.size ( _localToGlobal( d ) ); }
+	int _sNodesBeginSlice( LocalDepth d ) const { return _localInset(d); }
+	int _sNodesEndSlice( LocalDepth d ) const{ return ( 1<<_localToGlobal(d) ) - _localInset(d) - 1; }
+	int _sNodesBegin( LocalDepth d , int slice ) const { return _sNodes.begin( _localToGlobal( d ) , slice + _localInset( d ) ); }
+	int _sNodesEnd  ( LocalDepth d , int slice ) const { return _sNodes.end  ( _localToGlobal( d ) , slice + _localInset( d ) ); }
+	int _sNodesSize ( LocalDepth d , int slice ) const { return _sNodes.size ( _localToGlobal( d ) , slice + _localInset( d ) ); }
+
+	template< unsigned int FEMDegree > static bool _IsInteriorlySupported( LocalDepth depth , const LocalOffset off )
+	{
+		if( depth>=0 )
+		{
+			int begin , end;
+			BSplineSupportSizes< FEMDegree >::InteriorSupportedSpan( depth , begin , end );
+			bool interior = true;
+			for( int dd=0 ; dd<Dim ; dd++ ) interior &= off[dd]>=begin && off[dd]<end;
+			return interior;
+		}
+		else return false;
+	}
+	template< unsigned int FEMDegree > bool _isInteriorlySupported( const FEMTreeNode* node ) const
+	{
+		if( !node ) return false;
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		return _IsInteriorlySupported< FEMDegree >( d , off );
+	}
+	template< unsigned int ... FEMDegrees > static bool _IsInteriorlySupported( UIntPack< FEMDegrees ... > , LocalDepth depth , const LocalOffset off ){ return BaseFEMIntegrator::IsInteriorlySupported( UIntPack< FEMDegrees ... >() , depth , off ); }
+	template< unsigned int ... FEMDegrees > bool _isInteriorlySupported( UIntPack< FEMDegrees ... > , const FEMTreeNode* node ) const
+	{
+		if( !node ) return false;
+		LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+		return _IsInteriorlySupported< FEMDegrees ... >( UIntPack< FEMDegrees ... >() , d , off );
+	}
+	template< unsigned int FEMDegree1 , unsigned int FEMDegree2 > static bool _IsInteriorlyOverlapped( LocalDepth depth , const LocalOffset off )
+	{
+		if( depth>=0 )
+		{
+			int begin , end;
+			BSplineIntegrationData< FEMDegreeAndBType< FEMDegree1 , BOUNDARY_NEUMANN >::Signature , FEMDegreeAndBType< FEMDegree2 , BOUNDARY_NEUMANN >::Signature >::InteriorOverlappedSpan( depth , begin , end );
+			bool interior = true;
+			for( int dd=0 ; dd<Dim ; dd++ ) interior &= off[dd]>=begin && off[dd]<end;
+			return interior;
+		}
+		else return false;
+	}
+	template< unsigned int FEMDegree1 , unsigned int FEMDegree2 > bool _isInteriorlyOverlapped( const FEMTreeNode* node ) const
+	{
+		if( !node ) return false;
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		return _IsInteriorlyOverlapped< FEMDegree1 , FEMDegree2 >( d , off );
+	}
+	template< unsigned int ... FEMDegrees1 , unsigned int ... FEMDegrees2 > static bool _IsInteriorlyOverlapped( UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , LocalDepth depth , const LocalOffset off ){ return BaseFEMIntegrator::IsInteriorlyOverlapped( UIntPack< FEMDegrees1 ... >() , UIntPack< FEMDegrees2 ... >() , depth , off ); }
+	template< unsigned int ... FEMDegrees1 , unsigned int ... FEMDegrees2 > bool _isInteriorlyOverlapped( UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , const FEMTreeNode* node ) const
+	{
+		if( !node ) return false;
+		LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+		return _IsInteriorlyOverlapped( UIntPack< FEMDegrees1 ... >() , UIntPack< FEMDegrees2 ... >() , d , off );
+	}
+	void _startAndWidth( const FEMTreeNode* node , Point< Real , Dim >& start , Real& width ) const
+	{
+		LocalDepth d ; LocalOffset off;
+		_localDepthAndOffset( node , d , off );
+		if( d>=0 ) width = Real( 1.0 / (1<<  d ) );
+		else       width = Real( 1.0 * (1<<(-d)) );
+		for( int dd=0 ; dd<Dim ; dd++ ) start[dd] = Real( off[dd] ) * width;
+	}
+	void _centerAndWidth( const FEMTreeNode* node , Point< Real , Dim >& center , Real& width ) const
+	{
+		int d , off[Dim];
+		_localDepthAndOffset( node , d , off );
+		width = Real( 1.0 / (1<<d) );
+		for( int dd=0 ; dd<Dim ; dd++ ) center[dd] = Real( off[dd] + 0.5 ) * width;
+	}
+	int _childIndex( const FEMTreeNode* node , Point< Real , Dim > p ) const
+	{
+		Point< Real , Dim > c ; Real w;
+		_centerAndWidth( node , c , w );
+		int cIdx = 0;
+		for( int d=0 ; d<Dim ; d++ ) if( p[d]>=c[d] ) cIdx |= (1<<d);
+		return cIdx;
+	}
+
+	template< unsigned int ... Degrees > void _setFullDepth( UIntPack< Degrees ... > , FEMTreeNode* node , LocalDepth depth );
+	template< unsigned int ... Degrees > void _setFullDepth( UIntPack< Degrees ... > , LocalDepth depth );
+	template< unsigned int ... Degrees > LocalDepth _getFullDepth( UIntPack< Degrees ... > , const FEMTreeNode* node ) const;
+
+public:
+	template< unsigned int ... Degrees > LocalDepth getFullDepth( UIntPack< Degrees ... > ) const;
+
+	LocalDepth depth( const FEMTreeNode* node ) const { return _localDepth( node ); }
+	void depthAndOffset( const FEMTreeNode* node , LocalDepth& depth , LocalOffset& offset ) const { _localDepthAndOffset( node , depth , offset ); }
+
+	int nodesSize ( void ) const { return _sNodes.size( ); }
+	int nodesBegin( LocalDepth d ) const { return _sNodes.begin( _localToGlobal( d ) ); }
+	int nodesEnd  ( LocalDepth d ) const { return _sNodes.end  ( _localToGlobal( d ) ); }
+	int nodesSize ( LocalDepth d ) const { return _sNodes.size ( _localToGlobal( d ) ); }
+	int nodesBegin( LocalDepth d , int slice ) const { return _sNodes.begin( _localToGlobal( d ) , slice + _localInset( d ) ); }
+	int nodesEnd  ( LocalDepth d , int slice ) const { return _sNodes.end  ( _localToGlobal( d ) , slice + _localInset( d ) ); }
+	int nodesSize ( LocalDepth d , int slice ) const { return _sNodes.size ( _localToGlobal( d ) , slice + _localInset( d ) ); }
+	const FEMTreeNode* node( int idx ) const { return _sNodes.treeNodes[idx]; }
+	void centerAndWidth( int idx , Point< Real , Dim >& center , Real& width ) const { _centerAndWidth( _sNodes.treeNodes[idx] , center , width ); }
+	void  startAndWidth( int idx , Point< Real , Dim >& center , Real& width ) const {  _startAndWidth( _sNodes.treeNodes[idx] , center , width ); }
+
+protected:
+	/////////////////////////////////////
+	// System construction code        //
+	// MultiGridFEMTreeData.System.inl //
+	/////////////////////////////////////
+public:
+	template< unsigned int ... FEMSigs > void setMultiColorIndices( UIntPack< FEMSigs ... > , int depth , std::vector< std::vector< int > >& indices ) const;
+protected:
+	template< unsigned int ... FEMSigs > void _setMultiColorIndices( UIntPack< FEMSigs ... > , int start , int end , std::vector< std::vector< int > >& indices ) const;
+
+	struct _SolverStats
+	{
+		double constraintUpdateTime , systemTime , solveTime;
+		double bNorm2 , inRNorm2 , outRNorm2;
+	};
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)!=0) >::type _addPointValues( UIntPack< FEMSigs ... > , StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		_addPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , bsData , iInfo ) , _addPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , bsData , iInfos... );
+	}
+	template< unsigned int ... FEMSigs >
+	void _addPointValues( UIntPack< FEMSigs ... > , StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData ) const { }
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+	void _addPointValues( UIntPack< FEMSigs ... > , StaticWindow< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* interpolationInfo ) const;
+
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)>1) >::type _addProlongedPointValues( UIntPack< FEMSigs ... > , WindowSlice< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		_addProlongedPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , pNeighbors , bsData , iInfo ) , _addProlongedPointValues( UIntPack< FEMSigs ... >() , pointValues , neighbors , pNeighbors , bsData , iInfos... );
+	}
+	template< unsigned int ... FEMSigs > void _addProlongedPointValues( UIntPack< FEMSigs ... > , WindowSlice< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData ) const { }
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+	void _addProlongedPointValues( UIntPack< FEMSigs ... > , WindowSlice< Real , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > > pointValues , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* iInfo ) const;
+
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)!=0) >::type _setPointValuesFromProlongedSolution( LocalDepth highDepth , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , InterpolationInfo< T , PointD >* iInfo , InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		_setPointValuesFromProlongedSolution( highDepth , bsData , prolongedSolution , iInfo ) , _setPointValuesFromProlongedSolution( highDepth , bsData , prolongedSolution , iInfos... );
+	}
+	template< unsigned int ... FEMSigs , typename T > void _setPointValuesFromProlongedSolution( LocalDepth highDepth , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution ) const { }
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+	void _setPointValuesFromProlongedSolution( LocalDepth highDepth , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , InterpolationInfo< T , PointD >* interpolationInfo ) const;
+
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)!=0) , T >::type _getInterpolationConstraintFromProlongedSolution( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		return _getInterpolationConstraintFromProlongedSolution( neighbors , node , prolongedSolution , bsData , iInfo ) + _getInterpolationConstraintFromProlongedSolution( neighbors , node , prolongedSolution , bsData , iInfos... );
+	}
+	template< unsigned int ... FEMSigs , typename T > T _getInterpolationConstraintFromProlongedSolution( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData ) const { return T(); }
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+	T _getInterpolationConstraintFromProlongedSolution( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointD >* iInfo ) const;
+
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)!=0) >::type _updateRestrictedInterpolationConstraints( const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth highDepth , ConstPointer( T ) solution , Pointer( T ) cumulativeConstraints , const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ) const 
+	{
+		_updateRestrictedInterpolationConstraints( bsData , highDepth , solution , cumulativeConstraints , iInfo ) , _updateRestrictedInterpolationConstraints( bsData , highDepth , solution , cumulativeConstraints , iInfos... );
+	}
+	template< unsigned int ... FEMSigs , typename T > void _updateRestrictedInterpolationConstraints( PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth highDepth , ConstPointer( T ) solution , Pointer( T ) cumulativeConstraints ) const { ; }
+	template< unsigned int ... FEMSigs , typename T , unsigned int PointD >
+	void _updateRestrictedInterpolationConstraints( const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth highDepth , ConstPointer( T ) solution , Pointer( T ) cumulativeConstraints , const InterpolationInfo< T , PointD >* interpolationInfo ) const;
+
+	template< unsigned int FEMDegree1 , unsigned int FEMDegree2 > static void _SetParentOverlapBounds( const FEMTreeNode* node , int start[Dim] , int end[Dim] );
+	template< unsigned int FEMDegree1 , unsigned int FEMDegree2 > static void _SetParentOverlapBounds( int cIdx , int start[Dim] , int end[Dim] );
+	template< unsigned int ... FEMDegrees1 , unsigned int ... FEMDegrees2 > static void _SetParentOverlapBounds( UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , const FEMTreeNode* node , int start[Dim] , int end[Dim] )
+	{
+		if( node )
+		{
+			int d , off[Dim] ; node->depthAndOffset( d , off );
+			BaseFEMIntegrator::template ParentOverlapBounds( UIntPack< FEMDegrees1 ... >() , UIntPack< FEMDegrees2 ... >() , d , off , start , end );
+		}
+	}
+	template< unsigned int ... FEMDegrees1 , unsigned int ... FEMDegrees2 > static void _SetParentOverlapBounds( UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , int cIdx , int start[Dim] , int end[Dim] )
+	{
+		BaseFEMIntegrator::template ParentOverlapBounds( UIntPack< FEMDegrees1 ... >() , UIntPack< FEMDegrees2 ... >() , cIdx , start , end );
+	}
+
+	template< unsigned int ... FEMSigs >
+	int _getProlongedMatrixRowSize( const FEMTreeNode* node , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors ) const;
+#if defined( __GNUC__ ) && __GNUC__ < 5
+	#warning "you've got me gcc version<5"
+		template< unsigned int ... FEMSigs >
+	int _getMatrixRowSize( UIntPack< FEMSigs ... > , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors ) const;
+#else // !__GNUC__ || __GNUC__ >=5
+	template< unsigned int ... FEMSigs >
+	int _getMatrixRowSize( const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors ) const;
+#endif // __GNUC__ || __GNUC__ < 4
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	T _setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , size_t idx , SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size > &M , int offset , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	T _setMatrixRowAndGetConstraintFromProlongation( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , Pointer( MatrixEntry< Real > ) row , int offset , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const CCStencil< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) prolongedSolution , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	int _setProlongedMatrixRow( const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , Pointer( MatrixEntry< Real > ) row , int offset , const DynamicWindow< double , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& stencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+
+	// Updates the constraints @(depth) based on the solution coefficients @(depth-1)
+	template< unsigned int ... FEMSigs , typename T , unsigned int ... PointDs >
+	T _getConstraintFromProlongedSolution( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& neighbors , const typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& pNeighbors , const FEMTreeNode* node , ConstPointer( T ) prolongedSolution , const DynamicWindow< double , UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >& stencil , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , typename SORWeights , unsigned int ... PointDs >
+	int _solveFullSystemGS( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , SORWeights sorWeights , _SolverStats& stats , bool computeNorms , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , typename SORWeights , unsigned int ... PointDs >
+	int _solveSlicedSystemGS( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , unsigned int sliceBlockSize , SORWeights sorWeights , _SolverStats& stats , bool computeNorms , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , typename SORWeights , unsigned int ... PointDs >
+	int _solveSystemGS( UIntPack< FEMSigs ... > , bool sliced , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , unsigned int sliceBlockSize , SORWeights sorWeights , _SolverStats& stats , bool computeNorms , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const
+	{
+		if( sliced ) return _solveSlicedSystemGS( UIntPack< FEMSigs ... >() , F , bsData , depth , solution , prolongedSolution , constraints , Dot , iters , coarseToFine , sliceBlockSize , sorWeights , stats , computeNorms , interpolationInfo ... );
+		else         return _solveFullSystemGS  ( UIntPack< FEMSigs ... >() , F , bsData , depth , solution , prolongedSolution , constraints , Dot , iters , coarseToFine ,                  sorWeights , stats , computeNorms , interpolationInfo ... );
+	}
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+	int _solveSystemCG( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) prolongedSolution , ConstPointer( T ) constraints , TDotT Dot , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms , double cgAccuracy , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+	void _solveRegularMG( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , Pointer( T ) solution , ConstPointer( T ) constraints , TDotT Dot , int vCycles , int iters , _SolverStats& stats , bool computeNorms , double cgAccuracy , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+
+	// Updates the cumulative integral constraints @(depth-1) based on the change in solution coefficients @(depth)
+	template< unsigned int ... FEMSigs , typename T >
+	void _updateRestrictedIntegralConstraints( UIntPack< FEMSigs ... > , const typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth highDepth , ConstPointer( T ) solution , Pointer( T ) cumulativeConstraints ) const;
+
+	template< unsigned int PointD , typename T , unsigned int ... FEMSigs >
+	CumulativeDerivativeValues< T , Dim , PointD > _coarserFunctionValues( UIntPack< FEMSigs ... > , Point< Real , Dim > p , const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) coefficients ) const;
+	template< unsigned int PointD , typename T , unsigned int ... FEMSigs >
+	CumulativeDerivativeValues< T , Dim , PointD >   _finerFunctionValues( UIntPack< FEMSigs ... > , Point< Real , Dim > p , const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , ConstPointer( T ) coefficients ) const;
+
+	template< unsigned int ... FEMSigs , typename T , unsigned int ... PointDs >
+	int _getSliceMatrixAndProlongationConstraints( UIntPack< FEMSigs ... > , const BaseSystem< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , SparseMatrix< Real , int , WindowSize< UIntPack< BSplineOverlapSizes< FEMSignature< FEMSigs >::Degree >::OverlapSize ... > >::Size >& matrix , Pointer( Real ) diagonalR , const PointEvaluator< UIntPack< FEMSigs ... > , UIntPack< FEMSignature< FEMSigs >::Degree ... > >& bsData , LocalDepth depth , int nBegin , int nEnd , ConstPointer( T ) prolongedSolution , Pointer( T ) constraints , const CCStencil < UIntPack< FEMSignature< FEMSigs >::Degree ... > >& ccStencil , const PCStencils< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& pcStencils , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+
+	// Down samples constraints @(depth) to constraints @(depth-1)
+	template< class C , unsigned ... Degrees , unsigned int ... FEMSigs > void _downSample( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< Degrees ... > >& RP , LocalDepth highDepth , Pointer( C ) constraints ) const;
+	// Up samples coefficients @(depth-1) to coefficients @(depth)
+	template< class C , unsigned ... Degrees , unsigned int ... FEMSigs > void _upSample( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template RestrictionProlongation< UIntPack< Degrees ... > >& RP , LocalDepth highDepth , Pointer( C ) coefficients ) const;
+
+	template< bool XMajor , class C , unsigned int ... FEMSigs > static void _RegularGridUpSample( UIntPack< FEMSigs ... > ,                                                                                           LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients );
+	template< bool XMajor , class C , unsigned int ... FEMSigs > static void _RegularGridUpSample( UIntPack< FEMSigs ... > , const int lowBegin[] , const int lowEnd[] , const int highBegin[] , const int highEnd[] , LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients );
+public:
+	template< class C , unsigned int ... FEMSigs > DenseNodeData< C , UIntPack< FEMSigs ... > > coarseCoefficients( const  DenseNodeData< C , UIntPack< FEMSigs ... > >& coefficients ) const;
+	template< class C , unsigned int ... FEMSigs > DenseNodeData< C , UIntPack< FEMSigs ... > > coarseCoefficients( const SparseNodeData< C , UIntPack< FEMSigs ... > >& coefficients ) const;
+
+	// For each (valid) fem node, compute the ratio of the sum of active prolongation weights to the sum of total prolongation weights
+	// If the prolongToChildren flag is set, then these weights are pushed to the children by computing the ratio of the prolongation of the above weights to the prolongation of unity weights 
+	template< unsigned int ... FEMSigs > DenseNodeData< Real , UIntPack< FEMSigs ... > > prolongationWeights( UIntPack< FEMSigs ... > , bool prolongToChildren ) const;
+
+	// For each (valid) fem node, compute the integral of the basis function over the valid space nodes over the integral of the basis function
+	template< unsigned int ... FEMSigs > DenseNodeData< Real , UIntPack< FEMSigs ... > > supportWeights( UIntPack< FEMSigs ... > ) const;
+protected:
+
+	//////////////////////////////////////////////
+	// Code for splatting point-sample data     //
+	// MultiGridFEMTreeData.WeightedSamples.inl //
+	//////////////////////////////////////////////
+	template< unsigned int WeightDegree >
+	void _addWeightContribution( DensityEstimator< WeightDegree >& densityWeights , FEMTreeNode* node , Point< Real , Dim > position , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , Real weight=Real(1.0) );
+	template< unsigned int WeightDegree , class PointSupportKey >
+	Real _getSamplesPerNode( const DensityEstimator< WeightDegree >& densityWeights , const FEMTreeNode* node , Point< Real , Dim > position , PointSupportKey& weightKey ) const;
+	template< unsigned int WeightDegree , class WeightKey >
+	void _getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , const FEMTreeNode* node , Point< Real , Dim > position , WeightKey& weightKey , Real& depth , Real& weight ) const;
+	template< unsigned int WeightDegree , class WeightKey >
+	void _getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , Point< Real , Dim > position , WeightKey& weightKey , Real& depth , Real& weight ) const;
+
+	template< bool CreateNodes ,                             class V , unsigned int ... DataSigs > void      _splatPointData( FEMTreeNode* node ,                                                          Point< Real , Dim > point , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& data ,                                                                         PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey                                                                        );
+	template< bool CreateNodes , unsigned int WeightDegree , class V , unsigned int ... DataSigs > Real      _splatPointData( const DensityEstimator< WeightDegree >& densityWeights ,                     Point< Real , Dim > point , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& data , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey , LocalDepth minDepth , LocalDepth maxDepth , int dim , Real depthBias );
+	template< bool CreateNodes , unsigned int WeightDegree , class V , unsigned int ... DataSigs > Real _multiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , FEMTreeNode* node , Point< Real , Dim > point , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& data , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , PointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey ,                                             int dim                  );
+	template< unsigned int WeightDegree , class V , unsigned int ... DataSigs > Real _nearestMultiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , FEMTreeNode* node , Point< Real , Dim > point , V v , SparseNodeData< V , UIntPack< DataSigs ... > >& data , PointSupportKey< IsotropicUIntPack< Dim , WeightDegree > >& weightKey , int dim=Dim );
+	template< class V , class Coefficients , unsigned int D , unsigned int ... DataSigs > V _evaluate( const Coefficients& coefficients , Point< Real , Dim > p , const PointEvaluator< UIntPack< DataSigs ... > , IsotropicUIntPack< Dim , D > >& pointEvaluator , const ConstPointSupportKey< UIntPack< FEMSignature< DataSigs >::Degree ... > >& dataKey ) const;
+public:
+	template< bool XMajor , class V , unsigned int ... DataSigs > Pointer( V ) regularGridEvaluate( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , int& res , LocalDepth depth=-1 , bool primal=false ) const;
+	template< bool XMajor , class V , unsigned int ... DataSigs > Pointer( V ) regularGridUpSample( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , LocalDepth depth=-1 ) const;
+	template< bool XMajor , class V , unsigned int ... DataSigs > Pointer( V ) regularGridUpSample( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , const int begin[Dim] , const int end[Dim] , LocalDepth depth=-1 ) const;
+	template< class V , unsigned int ... DataSigs > V average( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients ) const;
+	template< class V , unsigned int ... DataSigs > V average( const DenseNodeData< V , UIntPack< DataSigs ... > >& coefficients , const Real begin[Dim] , const Real end[Dim] ) const;
+	template< typename T > struct HasNormalDataFunctor{};
+	template< unsigned int ... NormalSigs >
+	struct HasNormalDataFunctor< UIntPack< NormalSigs ... > >
+	{
+		const SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > >& normalInfo;
+		HasNormalDataFunctor( const SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > >& ni ) : normalInfo( ni ){ ; }
+		bool operator() ( const FEMTreeNode* node ) const
+		{
+			const Point< Real , Dim >* n = normalInfo( node );
+			if( n )
+			{
+				const Point< Real , Dim >& normal = *n;
+				for( int d=0 ; d<Dim ; d++ ) if( normal[d]!=0 ) return true;
+			}
+			if( node->children ) for( int c=0 ; c<(1<<Dim) ; c++ ) if( (*this)( node->children + c ) ) return true;
+			return false;
+		}
+	};
+	struct TrivialHasDataFunctor{ bool operator() ( const FEMTreeNode* node ) const { return true; } };
+protected:
+	// [NOTE] The input/output for this method is pre-scaled by weight
+	template< typename T > bool _setInterpolationInfoFromChildren( FEMTreeNode* node , SparseNodeData< T , IsotropicUIntPack< Dim , FEMTrivialSignature > >& iInfo ) const;
+	template< typename T ,                 unsigned int PointD , typename ConstraintDual > SparseNodeData< DualPointInfo       < Dim , Real ,        T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > _densifyInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples ,                                   ConstraintDual constraintDual , int adaptiveExponent ) const;
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual > SparseNodeData< DualPointAndDataInfo< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > _densifyInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , int adaptiveExponent ) const;
+	template< typename T ,                 unsigned int PointD , typename ConstraintDual > SparseNodeData< DualPointInfoBrood       < Dim , Real ,        T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > _densifyChildInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples ,                                   ConstraintDual constraintDual , bool noRescale ) const;
+	template< typename T , typename Data , unsigned int PointD , typename ConstraintDual > SparseNodeData< DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > _densifyChildInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , bool noRescale ) const;
+
+	void _setSpaceValidityFlags( void ) const;
+	template< unsigned int ... FEMSigs1 > void _setFEM1ValidityFlags( UIntPack< FEMSigs1 ... > ) const;
+	template< unsigned int ... FEMSigs2 > void _setFEM2ValidityFlags( UIntPack< FEMSigs2 ... > ) const;
+	template< unsigned int ... FEMSigs  > void _setRefinabilityFlags( UIntPack< FEMSigs  ... > ) const;
+	template< class HasDataFunctor > void _clipTree( const HasDataFunctor& f , LocalDepth fullDepth );
+public:
+	template< unsigned int PointD , unsigned int ... FEMSigs > SparseNodeData< CumulativeDerivativeValues< Real , Dim , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > leafValues( const DenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients , int maxDepth=-1 ) const;
+protected:
+
+	/////////////////////////////////////
+	// Evaluation Methods              //
+	// MultiGridFEMTreeData.Evaluation //
+	/////////////////////////////////////
+	static const unsigned int CHILDREN = 1<<Dim;
+	template< typename Pack , unsigned int PointD > struct _Evaluator{ };
+	template< unsigned int ... FEMSigs , unsigned int PointD >
+	struct _Evaluator< UIntPack< FEMSigs ... > , PointD >
+	{
+		static_assert( Dim == sizeof...(FEMSigs) , "[ERROR] Number of signatures doesn't match dimension" );
+
+		typedef DynamicWindow< CumulativeDerivativeValues< double , Dim , PointD > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > > CenterStencil;
+		typedef DynamicWindow< CumulativeDerivativeValues< double , Dim , PointD > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::SupportSize ... > > CornerStencil;
+		typedef DynamicWindow< CumulativeDerivativeValues< double , Dim , PointD > , UIntPack< ( BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::BCornerSize + 1 ) ... > > BCornerStencil;
+
+		typedef std::tuple< typename BSplineEvaluationData< FEMSigs >::template      Evaluator< PointD > ... >      Evaluators;
+		typedef std::tuple< typename BSplineEvaluationData< FEMSigs >::template ChildEvaluator< PointD > ... > ChildEvaluators;
+		struct StencilData
+		{
+			CenterStencil ccCenterStencil , pcCenterStencils[CHILDREN];
+			CornerStencil ccCornerStencil[CHILDREN] , pcCornerStencils[CHILDREN][CHILDREN];
+			BCornerStencil ccBCornerStencil[CHILDREN] , pcBCornerStencils[CHILDREN][CHILDREN];
+		};
+		Pointer( StencilData ) stencilData;
+		Pointer(      Evaluators )      evaluators;
+		Pointer( ChildEvaluators ) childEvaluators;
+
+		void set( LocalDepth depth );
+		_Evaluator( void ){ _pointEvaluator = NULL ; stencilData = NullPointer( StencilData ) , evaluators = NullPointer( Evaluators ) , childEvaluators = NullPointer( ChildEvaluators ); }
+		~_Evaluator( void ){ if( _pointEvaluator ) delete _pointEvaluator , _pointEvaluator = NULL ; if( stencilData ) DeletePointer( stencilData ) ; if( evaluators ) DeletePointer( evaluators ) ; if( childEvaluators ) DeletePointer( childEvaluators ); }
+	protected:
+		enum _CenterOffset{ CENTER=-1 , BACK=0 , FRONT=1 };
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< double , Dim , _PointD >       _values( unsigned int d , const int fIdx[Dim] , const int idx[Dim] , const _CenterOffset off[Dim] , bool parentChild ) const;
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< double , Dim , _PointD > _centerValues( unsigned int d , const int fIdx[Dim] , const int idx[Dim] ,                                bool parentChild ) const;
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< double , Dim , _PointD > _cornerValues( unsigned int d , const int fIdx[Dim] , const int idx[Dim] , int corner ,                   bool parentChild ) const;
+		template< unsigned int _PointD=PointD , unsigned int I=0 > typename std::enable_if< I==Dim >::type _setDValues( unsigned int d , const int fIdx[] , const int cIdx[] , const _CenterOffset off[] , bool pc , double dValues[][_PointD+1] ) const{ }
+		template< unsigned int _PointD=PointD , unsigned int I=0 > typename std::enable_if< I< Dim >::type _setDValues( unsigned int d , const int fIdx[] , const int cIdx[] , const _CenterOffset off[] , bool pc , double dValues[][_PointD+1] ) const
+		{
+			if( pc ) for( int dd=0 ; dd<=_PointD ; dd++ ) dValues[I][dd] = off[I]==CENTER ? std::get< I >( childEvaluators[d] ).centerValue( fIdx[I] , cIdx[I] , dd ) : std::get< I >( childEvaluators[d] ).cornerValue( fIdx[I] , cIdx[I]+off[I] , dd );
+			else     for( int dd=0 ; dd<=_PointD ; dd++ ) dValues[I][dd] = off[I]==CENTER ? std::get< I >(      evaluators[d] ).centerValue( fIdx[I] , cIdx[I] , dd ) : std::get< I >(      evaluators[d] ).cornerValue( fIdx[I] , cIdx[I]+off[I] , dd );
+			_setDValues< _PointD , I+1 >( d , fIdx , cIdx , off , pc , dValues );
+		}
+
+		template< unsigned int I=0 > typename std::enable_if< I==Dim >::type _setEvaluators( unsigned int maxDepth ){ }
+		template< unsigned int I=0 > typename std::enable_if< I< Dim >::type _setEvaluators( unsigned int maxDepth )
+		{
+			static const unsigned int FEMSig = UIntPack< FEMSigs ... >::template Get< I >();
+			for( unsigned int d=0 ; d<=maxDepth ; d++ ) BSplineEvaluationData< FEMSig >::     SetEvaluator( std::template get< I >(      evaluators[d] ) , d   );
+			for( unsigned int d=1 ; d<=maxDepth ; d++ ) BSplineEvaluationData< FEMSig >::SetChildEvaluator( std::template get< I >( childEvaluators[d] ) , d-1 );
+			_setEvaluators< I+1 >( maxDepth );
+		}
+		typename FEMIntegrator::template PointEvaluator< UIntPack< FEMSigs ... > , IsotropicUIntPack< Dim , PointD > >* _pointEvaluator;
+		friend FEMTree;
+	};
+	template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+	CumulativeDerivativeValues< V , Dim , _PointD > _getCenterValues( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node ,                         ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const;
+	template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+	CumulativeDerivativeValues< V , Dim , _PointD > _getCornerValues( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , int corner            , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const;
+	template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+	CumulativeDerivativeValues< V , Dim , _PointD > _getValues      ( const ConstPointSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , Point< Real , Dim > p , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth ) const;
+	template< class V , unsigned int _PointD , unsigned int ... FEMSigs , unsigned int PointD >
+	CumulativeDerivativeValues< V , Dim , _PointD > _getCornerValues( const ConstCornerSupportKey< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& neighborKey , const FEMTreeNode* node , int corner            , ConstPointer( V ) solution , ConstPointer( V ) coarseSolution , const _Evaluator< UIntPack< FEMSigs ... > , PointD >& evaluator , int maxDepth , bool isInterior ) const;
+	template< unsigned int ... SupportSizes >
+	struct CornerLoopData
+	{
+		typedef UIntPack< SupportSizes ... > _SupportSizes;
+//		static const unsigned int supportSizes[] = { SupportSizes ... };
+		static const unsigned int supportSizes[];
+		unsigned int ccSize[1<<Dim] , pcSize[1<<Dim][1<<Dim];
+		unsigned int ccIndices[1<<Dim]        [ WindowSize< _SupportSizes >::Size ];
+		unsigned int pcIndices[1<<Dim][1<<Dim][ WindowSize< _SupportSizes >::Size ];
+		CornerLoopData( void )
+		{
+			int start[Dim] , end[Dim] , _start[Dim] , _end[Dim];
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+			{
+				ccSize[c] = 0;
+				for( int dd=0 ; dd<Dim ; dd++ ) 
+				{
+					start[dd] = 0 , end[dd] = supportSizes[dd];
+					if( (c>>dd) & 1 ) start[dd]++;
+					else              end  [dd]--;
+				}
+				unsigned int idx[Dim];
+				WindowLoop< Dim >::Run
+				(
+					start , end ,
+					[&]( int d , int i ){ idx[d] = i; } ,
+					[&]( void ){ ccIndices[c][ ccSize[c]++ ] = GetWindowIndex( _SupportSizes() , idx ); }
+				);
+
+				for( int _c=0 ; _c<(1<<Dim) ; _c++ )
+				{
+					pcSize[c][_c] = 0;
+					for( int dd=0 ; dd<Dim ; dd++ ) 
+					{
+						if( ( (_c>>dd) & 1 ) != ( (c>>dd) & 1 ) ) _start[dd] = 0 , _end[dd] = supportSizes[dd];
+						else _start[dd] = start[dd] , _end[dd] = end[dd];
+					}
+
+					unsigned int idx[Dim];
+					WindowLoop< Dim >::Run
+					(
+						_start , _end ,
+						[&]( int d , int i ){ idx[d] = i; } ,
+						[&]( void ){ pcIndices[c][_c][ pcSize[c][_c]++ ] = GetWindowIndex( _SupportSizes() , idx ); }
+					);
+				}
+			}
+		}
+	};
+public:
+	template< typename Pack , unsigned int PointD , typename T > struct _MultiThreadedEvaluator{ };
+	template< unsigned int ... FEMSigs , unsigned int PointD , typename T >
+	struct _MultiThreadedEvaluator< UIntPack< FEMSigs ... > , PointD , T >
+	{
+		typedef UIntPack< FEMSigs ... > FEMSignatures;
+		typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > FEMDegrees;
+		const FEMTree* _tree;
+		int _threads;
+		std::vector< ConstPointSupportKey< FEMDegrees > > _pointNeighborKeys;
+		std::vector< ConstCornerSupportKey< FEMDegrees > > _cornerNeighborKeys;
+		_Evaluator< FEMSignatures , PointD > _evaluator;
+		const DenseNodeData< T , FEMSignatures >& _coefficients;
+		DenseNodeData< T , FEMSignatures > _coarseCoefficients;
+	public:
+		_MultiThreadedEvaluator( const FEMTree* tree , const DenseNodeData< T , FEMSignatures >& coefficients , int threads=omp_get_max_threads() );
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< T , Dim , _PointD > values( Point< Real , Dim > p , int thread=0 , const FEMTreeNode* node=NULL );
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< T , Dim , _PointD > centerValues( const FEMTreeNode* node , int thread=0 );
+		template< unsigned int _PointD=PointD > CumulativeDerivativeValues< T , Dim , _PointD > cornerValues( const FEMTreeNode* node , int corner , int thread=0 );
+	};
+	template< typename Pack , unsigned int PointD , typename T=Real > using MultiThreadedEvaluator = _MultiThreadedEvaluator< Pack , PointD , T >;
+	template< unsigned int DensityDegree >
+	struct MultiThreadedWeightEvaluator
+	{
+		const FEMTree* _tree;
+		int _threads;
+		std::vector< ConstPointSupportKey< IsotropicUIntPack< Dim , DensityDegree > > > _neighborKeys;
+		const DensityEstimator< DensityDegree >& _density;
+	public:
+		MultiThreadedWeightEvaluator( const FEMTree* tree , const DensityEstimator< DensityDegree >& density , int threads=omp_get_max_threads() );
+		Real weight( Point< Real , Dim > p , int thread=0 );
+	};
+
+
+	static double _MaxMemoryUsage , _LocalMemoryUsage;
+	void _reorderDenseOrSparseNodeData( const int* , size_t ){ ; }
+	template< class Data , unsigned int ... FEMSigs , class ... DenseOrSparseNodeData >
+	void _reorderDenseOrSparseNodeData( const int* map , size_t sz , SparseNodeData< Data , UIntPack< FEMSigs ... > >* sData , DenseOrSparseNodeData* ... data )
+	{
+		if( sData ) sData->_remapIndices( map , (int)sz );
+		_reorderDenseOrSparseNodeData( map , sz , data ... );
+	}
+	template< class Data , unsigned int ... FEMSigs , class ... DenseOrSparseNodeData >
+	void _reorderDenseOrSparseNodeData( const int* map , size_t sz , DenseNodeData< Data , UIntPack< FEMSigs ... > >* dData , DenseOrSparseNodeData* ... data )
+	{
+		if( dData ) dData->_remapIndices( map , sz );
+		_reorderDenseOrSparseNodeData( map , sz , data ... );
+	}
+public:
+	static double MaxMemoryUsage( void ){ return _MaxMemoryUsage; }
+	static double LocalMemoryUsage( void ){ return _LocalMemoryUsage; }
+	static void ResetLocalMemoryUsage( void ){ _LocalMemoryUsage = 0; }
+	static double MemoryUsage( void );
+	FEMTree( int blockSize );
+	FEMTree( FILE* fp , int blockSize );
+	~FEMTree( void )
+	{
+		if( _tree ) for( int c=0 ; c<(1<<Dim) ; c++ ) _tree[c].cleanChildren( nodeAllocator );
+		if( nodeAllocator ) delete nodeAllocator;
+	}
+	void write( FILE* fp ) const;
+	static void WriteParameter( FILE* fp )
+	{
+		FEMTreeRealType realType;
+		if     ( typeid( Real )==typeid( float  ) ) realType=FEM_TREE_REAL_FLOAT;
+		else if( typeid( Real )==typeid( double ) ) realType=FEM_TREE_REAL_DOUBLE;
+		else ERROR_OUT( "Unrecognized real type" );
+		fwrite( &realType , sizeof(FEMTreeRealType) , 1 , fp );
+		int dim = Dim;
+		fwrite( &dim , sizeof(int) , 1 , fp );
+	}
+
+	template< unsigned int LeftRadius , unsigned int RightRadius , class ... DenseOrSparseNodeData > void thicken( FEMTreeNode** nodes , size_t nodeCount , DenseOrSparseNodeData* ... data );
+	template< unsigned int LeftRadius , unsigned int RightRadius , class IsThickenNode , class ... DenseOrSparseNodeData > void thicken( IsThickenNode F , DenseOrSparseNodeData* ... data );
+	template< unsigned int Radius , class ... DenseOrSparseNodeData > void thicken( FEMTreeNode** nodes , size_t nodeCount , DenseOrSparseNodeData* ... data ){ thicken< Radius , Radius >( nodes , nodeCount , data ... ); }
+	template< unsigned int Radius , class IsThickenNode , class ... DenseOrSparseNodeData > void thicken( IsThickenNode F , DenseOrSparseNodeData* ... data ){ thicken< Radius , Radius >( F , data ... ); }
+	template< unsigned int DensityDegree >
+	typename FEMTree::template DensityEstimator< DensityDegree >* setDensityEstimator( const std::vector< PointSample >& samples , LocalDepth splatDepth , Real samplesPerNode , int coDimension );
+	template< unsigned int ... NormalSigs , unsigned int DensityDegree , class Data >
+#if defined(_WIN32) || defined(_WIN64)
+	SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > > setNormalField( UIntPack< NormalSigs ... > , const std::vector< PointSample >& samples , const std::vector< Data >& normalData , const DensityEstimator< DensityDegree >* density , Real& pointWeightSum , std::function< Real ( Real ) > BiasFunction = []( Real ){ return 0.f; } );
+#else // !_WIN32 && !_WIN64
+	SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > > setNormalField( UIntPack< NormalSigs ... > , const std::vector< PointSample >& samples , const std::vector< Data >& normalData , const DensityEstimator< DensityDegree >* density , Real& pointWeightSum , std::function< Real ( Real ) > BiasFunction = []( Real ){ return (Real)0; } );
+#endif // _WIN32 || _WIN64
+
+	template< unsigned int DataSig , bool CreateNodes , unsigned int DensityDegree , class Data >
+	SparseNodeData< Data , IsotropicUIntPack< Dim , DataSig > > setSingleDepthDataField( const std::vector< PointSample >& samples , const std::vector< Data >& sampleData , const DensityEstimator< DensityDegree >* density );
+	template< unsigned int DataSig , bool CreateNodes , unsigned int DensityDegree , class Data >
+	SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > setDataField( const std::vector< PointSample >& samples , std::vector< Data >& sampleData , const DensityEstimator< DensityDegree >* density , bool nearest=false );
+	template< unsigned int MaxDegree , class HasDataFunctor , class ... DenseOrSparseNodeData > void finalizeForMultigrid( LocalDepth fullDepth , const HasDataFunctor F , DenseOrSparseNodeData* ... data );
+
+	template< unsigned int ... FEMSigs > DenseNodeData< Real , UIntPack< FEMSigs ... > > initDenseNodeData( UIntPack< FEMSigs ... > ) const;
+	template< class Data , unsigned int ... FEMSigs > DenseNodeData< Data , UIntPack< FEMSigs ... > > initDenseNodeData( UIntPack< FEMSigs ... > ) const;
+
+	// Add multiple-dimensions -> one-dimension constraints
+	template< typename T , unsigned int ... FEMDegrees , unsigned int ... FEMSigs , unsigned int ... CDegrees , unsigned int ... CSigs , unsigned int CDim >
+	void addFEMConstraints( typename BaseFEMIntegrator::template Constraint< UIntPack< FEMDegrees ... > , UIntPack< CDegrees ... > , CDim >& F , const _SparseOrDenseNodeData< Point< T , CDim > , UIntPack< CSigs ... > >& coefficients , DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth ) const
+	{
+		typedef SparseNodeData< Point< T , CDim > , UIntPack< CSigs ... > > SparseType;
+		typedef  DenseNodeData< Point< T , CDim > , UIntPack< CSigs ... > >  DenseType;
+		static_assert( sizeof...( FEMDegrees )==Dim && sizeof...( FEMSigs )==Dim && sizeof...( CDegrees )==Dim && sizeof...( CSigs )==Dim  , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees ... >::template Compare< UIntPack< FEMSignature< FEMSigs >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		static_assert( UIntPack<   CDegrees ... >::template Compare< UIntPack< FEMSignature<   CSigs >::Degree ... > >::Equal , "[ERROR] Constraint signature and degrees don't match" );
+		if     ( typeid(coefficients)==typeid(SparseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F , static_cast< const SparseType& >( coefficients ) , constraints() , maxDepth );
+		else if( typeid(coefficients)==typeid( DenseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F , static_cast< const  DenseType& >( coefficients ) , constraints() , maxDepth );
+		else                                                return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F ,                                   coefficients   , constraints() , maxDepth );
+	}
+	// Add one-dimensions -> one-dimension constraints (with distinct signatures)
+	template< typename T , unsigned int ... FEMDegrees , unsigned int ... FEMSigs , unsigned int ... CDegrees , unsigned int ... CSigs >
+	void addFEMConstraints( typename BaseFEMIntegrator::template Constraint< UIntPack< FEMDegrees ... > , UIntPack< CDegrees ... > , 1 >& F , const _SparseOrDenseNodeData< T , UIntPack< CSigs ... > >& coefficients , DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth ) const
+	{
+		typedef SparseNodeData< T , UIntPack< CSigs ... > > SparseType;
+		typedef  DenseNodeData< T , UIntPack< CSigs ... > >  DenseType;
+		static_assert( sizeof...( FEMDegrees )==Dim && sizeof...( FEMSigs )==Dim && sizeof...( CDegrees )==Dim && sizeof...( CSigs )==Dim  , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees ... >::template Compare< UIntPack< FEMSignature< FEMSigs >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		static_assert( UIntPack<   CDegrees ... >::template Compare< UIntPack< FEMSignature<   CSigs >::Degree ... > >::Equal , "[ERROR] Constaint signature and degrees don't match" );
+		if     ( typeid(coefficients)==typeid(SparseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F , static_cast< const SparseType& >( coefficients ) , constraints() , maxDepth );
+		else if( typeid(coefficients)==typeid( DenseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F , static_cast< const  DenseType& >( coefficients ) , constraints() , maxDepth );
+		else                                                return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< CSigs ... >() , F ,                                   coefficients   , constraints() , maxDepth );
+	}
+	// Add one-dimensions -> one-dimension constraints (with the same signatures)
+	template< typename T , unsigned int ... FEMDegrees , unsigned int ... FEMSigs >
+//	void addFEMConstraints( typename BaseFEMIntegrator::template System< UIntPack< FEMDegrees ... > >& F , const SparseNodeData< T , UIntPack< FEMSigs ... > >& coefficients , _SparseOrDenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth ) const
+	void addFEMConstraints( typename BaseFEMIntegrator::template System< UIntPack< FEMDegrees ... > >& F , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs ... > >& coefficients , DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth ) const
+	{
+		typedef SparseNodeData< T , UIntPack< FEMSigs ... > > SparseType;
+		typedef  DenseNodeData< T , UIntPack< FEMSigs ... > >  DenseType;
+		static_assert( sizeof...( FEMDegrees )==Dim && sizeof...( FEMSigs )==Dim , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees ... >::template Compare< UIntPack< FEMSignature< FEMSigs >::Degree ... > >::Equal , "[ERROR] FEM signatures and degrees don't match" );
+		typename BaseFEMIntegrator::template SystemConstraint< UIntPack< FEMDegrees ... > > _F( F );
+		if     ( typeid(coefficients)==typeid(SparseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients ) , constraints() , maxDepth );
+		else if( typeid(coefficients)==typeid( DenseType) ) return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients ) , constraints() , maxDepth );
+		else                                                return _addFEMConstraints< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F ,                                   coefficients   , constraints() , maxDepth );
+	}
+
+	// Add interpolation constraints
+	template< typename T , unsigned int ... FEMSigs , unsigned int PointD , unsigned int ... PointDs >
+	typename std::enable_if< (sizeof...(PointDs)!=0) >::type addInterpolationConstraints( DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth , const InterpolationInfo< T , PointD >& iInfo , const InterpolationInfo< T , PointDs >& ... iInfos ) const
+	{
+		addInterpolationConstraints< T , FEMSigs ... >( constraints , maxDepth , iInfo );
+		addInterpolationConstraints< T , FEMSigs ... >( constraints , maxDepth , iInfos ... );
+	}
+	template< typename T , unsigned int ... FEMSigs , unsigned int PointD > void addInterpolationConstraints( DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxDepth , const InterpolationInfo< T , PointD >& interpolationInfo ) const;
+
+	// Real
+	template< unsigned int ... FEMDegrees1 , unsigned int ... FEMSigs1 , unsigned int ... FEMDegrees2 , unsigned int ... FEMSigs2 >
+	double dot( typename BaseFEMIntegrator::Constraint< UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , 1 >& F , const _SparseOrDenseNodeData< Real , UIntPack< FEMSigs1 ... > >& coefficients1 , const _SparseOrDenseNodeData< Real , UIntPack< FEMSigs2 ... > >& coefficients2 ) const
+	{
+		typedef SparseNodeData< Real , UIntPack< FEMSigs1 ... > > SparseType1;
+		typedef  DenseNodeData< Real , UIntPack< FEMSigs1 ... > >  DenseType1;
+		typedef SparseNodeData< Real , UIntPack< FEMSigs2 ... > > SparseType2;
+		typedef  DenseNodeData< Real , UIntPack< FEMSigs2 ... > >  DenseType2;
+		static_assert( sizeof...( FEMDegrees1 )==Dim && sizeof...( FEMSigs1 )==Dim && sizeof...( FEMDegrees2 )==Dim && sizeof...( FEMSigs2 )==Dim  , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees1 ... >::template Compare< UIntPack< FEMSignature< FEMSigs1 >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		static_assert( UIntPack< FEMDegrees2 ... >::template Compare< UIntPack< FEMSignature< FEMSigs2 >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		if     ( typeid(coefficients1)==typeid(SparseType1) && typeid(coefficients2)==typeid(SparseType2) ) return _dot< Real >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const SparseType1& >( coefficients1 ) , static_cast< const SparseType2& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid(SparseType1) && typeid(coefficients2)==typeid( DenseType2) ) return _dot< Real >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const SparseType1& >( coefficients1 ) , static_cast< const  DenseType2& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid( DenseType1) && typeid(coefficients2)==typeid( DenseType2) ) return _dot< Real >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const  DenseType1& >( coefficients1 ) , static_cast< const  DenseType2& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid( DenseType1) && typeid(coefficients2)==typeid(SparseType2) ) return _dot< Real >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const  DenseType1& >( coefficients1 ) , static_cast< const SparseType2& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else                                                                                                return _dot< Real >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F ,                                    coefficients1   ,                                    coefficients2   , []( Real v ,  Real w ){ return v*w; } );
+	}
+	template< unsigned int ... FEMDegrees , unsigned int ... FEMSigs >
+	double dot( typename BaseFEMIntegrator::System< UIntPack< FEMDegrees ... > >& F , const _SparseOrDenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients1 , const _SparseOrDenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients2 ) const
+	{
+		typedef SparseNodeData< Real , UIntPack< FEMSigs ... > > SparseType;
+		typedef  DenseNodeData< Real , UIntPack< FEMSigs ... > >  DenseType;
+		static_assert( sizeof...( FEMDegrees )==Dim && sizeof...( FEMSigs )==Dim , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees ... >::template Compare< UIntPack< FEMSignature< FEMSigs >::Degree ... > >::Equal , "[ERROR] FEM signatures and degrees don't match" );
+		typename BaseFEMIntegrator::template SystemConstraint< UIntPack< FEMDegrees ... > > _F( F );
+		if     ( typeid(coefficients1)==typeid(SparseType) && typeid(coefficients2)==typeid(SparseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients1 ) , static_cast< const SparseType& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid(SparseType) && typeid(coefficients2)==typeid( DenseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients1 ) , static_cast< const  DenseType& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid( DenseType) && typeid(coefficients2)==typeid( DenseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients1 ) , static_cast< const  DenseType& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients1)==typeid( DenseType) && typeid(coefficients2)==typeid(SparseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients1 ) , static_cast< const SparseType& >( coefficients2 ) , []( Real v ,  Real w ){ return v*w; } );
+		else                                                                                              return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F ,                                   coefficients1   ,                                   coefficients2   , []( Real v ,  Real w ){ return v*w; } );
+	}
+	template< unsigned int ... FEMDegrees , unsigned int ... FEMSigs >
+	double squareNorm( typename BaseFEMIntegrator::template System< UIntPack< FEMDegrees ... > >& F , const _SparseOrDenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients ) const
+	{
+		typedef SparseNodeData< Real , UIntPack< FEMSigs ... > > SparseType;
+		typedef  DenseNodeData< Real , UIntPack< FEMSigs ... > >  DenseType;
+		typename BaseFEMIntegrator::template SystemConstraint< UIntPack< FEMDegrees ... > > _F( F );
+		if     ( typeid(coefficients)==typeid(SparseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients ) , static_cast< const SparseType& >( coefficients ) , []( Real v ,  Real w ){ return v*w; } );
+		else if( typeid(coefficients)==typeid( DenseType) ) return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients ) , static_cast< const  DenseType& >( coefficients ) , []( Real v ,  Real w ){ return v*w; } );
+		else                                                return _dot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F ,                                   coefficients   ,                                   coefficients   , []( Real v ,  Real w ){ return v*w; } );
+	}
+
+	template< unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , unsigned int ... PointDs >
+	double interpolationDot( const DenseNodeData< Real , UIntPack< FEMSigs1 ... > >& coefficients1 , const DenseNodeData< Real , UIntPack< FEMSigs2 ... > >& coefficients2 , const InterpolationInfo< Real , PointDs >* ... iInfos ) const
+	{
+		static_assert( sizeof...( FEMSigs1 )==Dim && sizeof...( FEMSigs2 )==Dim , "[ERROR] Dimensions don't match" );
+		return _inteprolationDot( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , coefficients1 , coefficients2 , []( Real v ,  Real w ){ return v*w; } , iInfos... );
+	}
+	template< unsigned int ... FEMSigs , unsigned int ... PointDs >
+	double interpolationSquareNorm( const DenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients , const InterpolationInfo< Real , PointDs >* ... iInfos ) const
+	{
+		static_assert( sizeof...( FEMSigs )==Dim , "[ERROR] Dimensions don't match" );
+		return _interpolationDot< Real >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , coefficients , coefficients , []( Real v ,  Real w ){ return v*w; } , iInfos... );
+	}
+
+	// Generic
+	template< typename T , typename TDotT , unsigned int ... FEMDegrees1 , unsigned int ... FEMSigs1 , unsigned int ... FEMDegrees2 , unsigned int ... FEMSigs2 >
+	double dot( TDotT Dot , typename BaseFEMIntegrator::Constraint< UIntPack< FEMDegrees1 ... > , UIntPack< FEMDegrees2 ... > , 1 >& F , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs1 ... > >& coefficients1 , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs2 ... > >& coefficients2 ) const
+	{
+		typedef SparseNodeData< T , UIntPack< FEMSigs1 ... > > SparseType1;
+		typedef  DenseNodeData< T , UIntPack< FEMSigs1 ... > >  DenseType1;
+		typedef SparseNodeData< T , UIntPack< FEMSigs2 ... > > SparseType2;
+		typedef  DenseNodeData< T , UIntPack< FEMSigs2 ... > >  DenseType2;
+		static_assert( sizeof...( FEMDegrees1 )==Dim && sizeof...( FEMSigs1 )==Dim && sizeof...( FEMDegrees2 )==Dim && sizeof...( FEMSigs2 )==Dim  , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees1 ... >::template Compare< UIntPack< FEMSignature< FEMSigs1 >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		static_assert( UIntPack< FEMDegrees2 ... >::template Compare< UIntPack< FEMSignature< FEMSigs2 >::Degree ... > >::Equal , "[ERROR] FEM signature and degrees don't match" );
+		if     ( typeid(coefficients1)==typeid(SparseType1) && typeid(coefficients2)==typeid(SparseType2) ) return _dot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const SparseType1& >( coefficients1 ) , static_cast< const SparseType2& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid(SparseType1) && typeid(coefficients2)==typeid( DenseType2) ) return _dot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const SparseType1& >( coefficients1 ) , static_cast< const  DenseType2& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid( DenseType1) && typeid(coefficients2)==typeid( DenseType2) ) return _dot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const  DenseType1& >( coefficients1 ) , static_cast< const  DenseType2& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid( DenseType1) && typeid(coefficients2)==typeid(SparseType2) ) return _dot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F , static_cast< const  DenseType1& >( coefficients1 ) , static_cast< const SparseType2& >( coefficients2 ) , Dot );
+		else                                                                                                return _dot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , F ,                                    coefficients1   ,                                    coefficients2   , Dot );
+	}
+	template< typename T , typename TDotT , unsigned int ... FEMDegrees , unsigned int ... FEMSigs >
+	double dot( TDotT Dot , typename BaseFEMIntegrator::System< UIntPack< FEMDegrees ... > >& F , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs ... > >& coefficients1 , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs ... > >& coefficients2 ) const
+	{
+		typedef SparseNodeData< T , UIntPack< FEMSigs ... > > SparseType;
+		typedef  DenseNodeData< T , UIntPack< FEMSigs ... > >  DenseType;
+		static_assert( sizeof...( FEMDegrees )==Dim && sizeof...( FEMSigs )==Dim , "[ERROR] Dimensions don't match" );
+		static_assert( UIntPack< FEMDegrees ... >::template Compare< UIntPack< FEMSignature< FEMSigs >::Degree ... > >::Equal , "[ERROR] FEM signatures and degrees don't match" );
+		typename BaseFEMIntegrator::template SystemConstraint< UIntPack< FEMDegrees ... > > _F( F );
+		if     ( typeid(coefficients1)==typeid(SparseType) && typeid(coefficients2)==typeid(SparseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients1 ) , static_cast< const SparseType& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid(SparseType) && typeid(coefficients2)==typeid( DenseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients1 ) , static_cast< const  DenseType& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid( DenseType) && typeid(coefficients2)==typeid( DenseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients1 ) , static_cast< const  DenseType& >( coefficients2 ) , Dot );
+		else if( typeid(coefficients1)==typeid( DenseType) && typeid(coefficients2)==typeid(SparseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients1 ) , static_cast< const SparseType& >( coefficients2 ) , Dot );
+		else                                                                                              return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F ,                                   coefficients1   ,                                   coefficients2   , Dot );
+	}
+
+	template< typename T , typename TDotT , unsigned int ... FEMDegrees , unsigned int ... FEMSigs >
+	double squareNorm( TDotT Dot , typename BaseFEMIntegrator::template System< UIntPack< FEMDegrees ... > >& F , const _SparseOrDenseNodeData< T , UIntPack< FEMSigs ... > >& coefficients ) const
+	{
+		typedef SparseNodeData< T , UIntPack< FEMSigs ... > > SparseType;
+		typedef  DenseNodeData< T , UIntPack< FEMSigs ... > >  DenseType;
+		typename BaseFEMIntegrator::template SystemConstraint< UIntPack< FEMDegrees ... > > _F( F );
+		if     ( typeid(coefficients)==typeid(SparseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const SparseType& >( coefficients ) , static_cast< const SparseType& >( coefficients ) , Dot );
+		else if( typeid(coefficients)==typeid( DenseType) ) return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F , static_cast< const  DenseType& >( coefficients ) , static_cast< const  DenseType& >( coefficients ) , Dot );
+		else                                                return _dot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , _F ,                                   coefficients   ,                                   coefficients   , Dot );
+	}
+
+	template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , unsigned int ... PointDs >
+	double interpolationDot( TDotT Dot , const DenseNodeData< T , UIntPack< FEMSigs1 ... > >& coefficients1 , const DenseNodeData< T , UIntPack< FEMSigs2 ... > >& coefficients2 , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		static_assert( sizeof...( FEMSigs1 )==Dim && sizeof...( FEMSigs2 )==Dim , "[ERROR] Dimensions don't match" );
+		return _interpolationDot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , coefficients1 , coefficients2 , Dot , iInfos... );
+	}
+	template< typename T , typename TDotT , unsigned int ... FEMSigs , unsigned int ... PointDs >
+	double interpolationSquareNorm( TDotT Dot , const DenseNodeData< T , UIntPack< FEMSigs ... > >& coefficients , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		static_assert( sizeof...( FEMSigs )==Dim , "[ERROR] Dimensions don't match" );
+		return _interpolationDot< T >( UIntPack< FEMSigs ... >() , UIntPack< FEMSigs ... >() , coefficients , coefficients , Dot , iInfos... );
+	}
+
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	SparseMatrix< Real , int > systemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth depth , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	SparseMatrix< Real , int > prolongedSystemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth highDepth , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+	template< unsigned int ... FEMSigs >
+	SparseMatrix< Real , int > downSampleMatrix( UIntPack< FEMSigs ... > , LocalDepth highDepth ) const;
+
+	template< typename T , unsigned int ... PointDs , unsigned int ... FEMSigs >
+	SparseMatrix< Real , int > fullSystemMatrix( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , LocalDepth depth , bool nonRefinableOnly , const InterpolationInfo< T , PointDs >* ... interpolationInfo ) const;
+
+	struct SolverInfo
+	{
+	protected:
+		struct _IterFunction
+		{
+			_IterFunction( int i ) : _i0(i) , _type(0) {}
+			_IterFunction( std::function< int (              int ) > iFunction ) : _i1(iFunction) , _type(1) {}
+			_IterFunction( std::function< int (       bool , int ) > iFunction ) : _i2(iFunction) , _type(2) {}
+			_IterFunction( std::function< int ( int , bool , int ) > iFunction ) : _i3(iFunction) , _type(3) {}
+			_IterFunction& operator = ( int i ){ *this = _IterFunction(i) ; return *this; }
+			_IterFunction& operator = ( std::function< int (              int ) > iFunction ){ *this = _IterFunction(iFunction) ; return *this; }
+			_IterFunction& operator = ( std::function< int (       bool , int ) > iFunction ){ *this = _IterFunction(iFunction) ; return *this; }
+			_IterFunction& operator = ( std::function< int ( int , bool , int ) > iFunction ){ *this = _IterFunction(iFunction) ; return *this; }
+
+			int operator()( int vCycle , bool restriction , int depth ) const
+			{
+				switch( _type )
+				{
+				case 0: return _i0;
+				case 1: return _i1( depth );
+				case 2: return _i2( restriction , depth );
+				case 3: return _i3( vCycle , restriction , depth );
+				default: return 0;
+				}
+			}
+		protected:
+			int _i0;
+			std::function< int ( int ) > _i1;
+			std::function< int ( bool , int ) > _i2;
+			std::function< int ( int i3 , bool , int ) > _i3;
+			int _type;
+		};
+	public:
+		// How to solve
+		bool wCycle;
+		LocalDepth cgDepth;
+		bool cascadic;
+		unsigned int sliceBlockSize;
+		bool useSupportWeights , useProlongationSupportWeights;
+		std::function< Real ( Real , Real ) > sorRestrictionFunction;
+		std::function< Real ( Real , Real ) > sorProlongationFunction;
+		_IterFunction iters;
+		int vCycles;
+		double cgAccuracy;
+		int baseDepth , baseVCycles;
+		// What to output
+		bool verbose , showResidual;
+		int showGlobalResidual;
+
+		SolverInfo( void ) : cgDepth(0) , wCycle(false) , cascadic(true) , iters(1) , vCycles(1) , cgAccuracy(0.) , verbose(false) , showResidual(false) , showGlobalResidual(SHOW_GLOBAL_RESIDUAL_NONE) , sliceBlockSize(1) , sorRestrictionFunction( []( Real , Real ){ return (Real)1; } ) , sorProlongationFunction( []( Real , Real ){ return (Real)1; } ) , useSupportWeights( false ) , useProlongationSupportWeights( false ) , baseDepth(0) , baseVCycles(1) { }
+	};
+	// Solve the linear system
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+	void solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , DenseNodeData< T , UIntPack< FEMSigs ... > >& solution , TDotT Dot , LocalDepth maxSolveDepth , const SolverInfo& solverInfo , InterpolationInfo< T , PointDs >* ... iData ) const;
+	template< unsigned int ... FEMSigs , typename T , typename TDotT , unsigned int ... PointDs >
+	DenseNodeData< T , UIntPack< FEMSigs ... > > solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< T , UIntPack< FEMSigs ... > >& constraints , TDotT Dot , LocalDepth maxSolveDepth , const SolverInfo& solverInfo , InterpolationInfo< T , PointDs >* ... iData ) const;
+
+	template< unsigned int ... FEMSigs , unsigned int ... PointDs >
+	void solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< Real , UIntPack< FEMSigs ... > >& constraints , DenseNodeData< Real , UIntPack< FEMSigs ... > >& solution , LocalDepth maxSolveDepth , const SolverInfo& solverInfo , InterpolationInfo< Real , PointDs >* ... iData ) const
+	{
+		return solveSystem< FEMSigs ... , Real >( UIntPack< FEMSigs ... >() , F , constraints , solution , []( Real v , Real w ){ return v*w; } , maxSolveDepth , solverInfo , iData ... );
+	}
+	template< unsigned int ... FEMSigs , unsigned int ... PointDs >
+	DenseNodeData< Real , UIntPack< FEMSigs ... > > solveSystem( UIntPack< FEMSigs ... > , typename BaseFEMIntegrator::template System< UIntPack< FEMSignature< FEMSigs >::Degree ... > >& F , const DenseNodeData< Real , UIntPack< FEMSigs ... > >& constraints , LocalDepth maxSolveDepth , const SolverInfo& solverInfo , InterpolationInfo< Real , PointDs >* ... iData ) const
+	{
+		return solveSystem( UIntPack< FEMSigs ... >() , F , constraints , []( Real v , Real w ){ return v*w; } , maxSolveDepth , solverInfo , iData ... );
+	}
+
+	FEMTreeNode& spaceRoot( void ){ return *_spaceRoot; }
+	const FEMTreeNode& tree( void ) const { return *_tree; }
+	std::function< void ( FEMTreeNode& ) > initializer( void ){ return _NodeInitializer( *this ); }
+	size_t leaves( void ) const { return _tree->leaves(); }
+	size_t nodes( void ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( IsActiveNode< Dim >( n ) ) count++ ; return count; }
+	size_t ghostNodes( void ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( !IsActiveNode< Dim >( n ) ) count++ ; return count; }
+	inline size_t validSpaceNodes( void ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( isValidSpaceNode( n ) ) count++ ;  return count; }
+	inline size_t validSpaceNodes( LocalDepth d ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( _localDepth(n)==d && isValidSpaceNode( n ) ) count++ ; return count; }
+	template< unsigned int ... FEMSigs > size_t validFEMNodes( UIntPack< FEMSigs ... > ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( isValidFEMNode( UIntPack< FEMSigs ... >() , n ) ) count++ ;  return count; }
+	template< unsigned int ... FEMSigs > size_t validFEMNodes( UIntPack< FEMSigs ... > , LocalDepth d ) const { int count = 0 ; for( const FEMTreeNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( _localDepth(n)==d && isValidFEMNode( UIntPack< FEMSigs ... >() , n ) ) count++ ; return count; }
+	LocalDepth depth( void ) const { return _spaceRoot->maxDepth(); }
+	void resetNodeIndices( void ){ _nodeCount = 0 ; for( FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) _nodeInitializer( *node ) , node->nodeData.flags=0; }
+
+	std::vector< int > merge( FEMTree* tree );
+protected:
+	template< class Real1 , unsigned int _Dim > static bool _IsZero( Point< Real1 , _Dim > p );
+	template< class Real1 >                     static bool _IsZero( Real1 p );
+	template< class SReal , class Data , unsigned int _Dim > static Data _StencilDot( Point< SReal , _Dim > p1 , Point< Data , _Dim > p2 );
+	template< class SReal , class Data >                     static Data _StencilDot( Point< SReal , 1 >    p1 , Point< Data , 1 >    p2 );
+	template< class SReal , class Data >                     static Data _StencilDot( SReal                 p1 , Point< Data , 1 >    p2 );
+	template< class SReal , class Data >                     static Data _StencilDot( Point< SReal , 1 >    p1 , Data                 p2 );
+	template< class SReal , class Data >                     static Data _StencilDot( SReal                 p1 , Data                 p2 );
+	
+	// We need the signatures to test if nodes are valid
+	template< typename T , unsigned int ... FEMSigs , unsigned int ... CSigs , unsigned int ... FEMDegrees , unsigned int ... CDegrees , unsigned int CDim , class Coefficients >
+	void _addFEMConstraints( UIntPack< FEMSigs ... > , UIntPack< CSigs ... > , typename BaseFEMIntegrator::Constraint< UIntPack< FEMDegrees ... > , UIntPack< CDegrees ... > , CDim >& F , const Coefficients& coefficients , Pointer( T ) constraints , LocalDepth maxDepth ) const;
+	template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , unsigned int ... Degrees1 , unsigned int ... Degrees2 , class Coefficients1 , class Coefficients2 >
+	double _dot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , typename BaseFEMIntegrator::Constraint< UIntPack< Degrees1 ... > , UIntPack< Degrees2 ... > , 1 >& F , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot ) const;
+	template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , class Coefficients1 , class Coefficients2 , unsigned int PointD >
+	double _interpolationDot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot , const InterpolationInfo< T , PointD >* iInfo ) const;
+	template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , class Coefficients1 , class Coefficients2 , unsigned int PointD , unsigned int ... PointDs >
+	double _interpolationDot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot , const InterpolationInfo< T , PointD >* iInfo , const InterpolationInfo< T , PointDs >* ... iInfos ) const
+	{
+		return _interpolationDot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , coefficients1 , coefficients2 , Dot , iInfo ) + _interpolationDot< T >( UIntPack< FEMSigs1 ... >() , UIntPack< FEMSigs2 ... >() , coefficients1 , coefficients2 , Dot , iInfos... );
+	}
+	template< typename T , typename TDotT , unsigned int ... FEMSigs1 , unsigned int ... FEMSigs2 , class Coefficients1 , class Coefficients2 > double _interpolationDot( UIntPack< FEMSigs1 ... > , UIntPack< FEMSigs2 ... > , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 , TDotT Dot ) const{ return 0; }
+};
+template< unsigned int Dim , class Real > double FEMTree< Dim , Real >::_MaxMemoryUsage = 0;
+template< unsigned int Dim , class Real > double FEMTree< Dim , Real >::_LocalMemoryUsage = 0;
+
+
+template< unsigned int Dim , class Real , class Vertex >
+struct IsoSurfaceExtractor
+{
+	struct IsoStats
+	{
+		std::string toString( void ) const { return std::string( "Iso-surface extraction not supported for dimension %d" , Dim ); }
+	};
+	template< typename Data , unsigned int ... FEMSigs , unsigned int WeightDegree , unsigned int DataSig >
+	static IsoStats Extract
+	(
+		UIntPack< FEMSigs ... > , UIntPack< WeightDegree > , UIntPack< DataSig > ,							// Dummy variables for grouping the parameter
+		const FEMTree< Dim , Real >& tree ,																	// The tree over which the system is discretized
+		const typename FEMTree< Dim , Real >::template DensityEstimator< WeightDegree >* densityWeights ,	// Density weights
+		const SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > >* data ,	// Auxiliary spatial data
+		const DenseNodeData< Real , UIntPack< FEMSigs ... > >& coefficients ,								// The coefficients of the function
+		Real isoValue ,																						// The value at which to extract the level-set
+		CoredMeshData< Vertex >& mesh ,																		// The mesh in which to store the output
+		std::function< void ( Vertex& , Point< Real , Dim > , Real , Data ) > SetVertex ,					// A function for setting the depth and data of a vertex
+		bool nonLinearFit ,																					// Should a linear interpolant be used
+		bool addBarycenter ,																				// Should we triangulate polygons by adding a mid-point
+		bool polygonMesh ,																					// Should we output triangles or polygons
+		bool flipOrientation																				// Should we flip the orientation
+	)
+	{
+		// The unspecialized implementation is not supported
+		WARN( "Iso-surface extraction not supported for dimension %d" , Dim );
+		return IsoStats();
+	}
+};
+
+template< unsigned int Dim , class Real >
+struct FEMTreeInitializer
+{
+	typedef RegularTreeNode< Dim , FEMTreeNodeData > FEMTreeNode;
+	typedef NodeAndPointSample< Dim , Real > PointSample;
+
+	template< class Data >
+	struct DerivativeStream
+	{
+		virtual void resolution( unsigned int res[] ) const = 0;
+		virtual bool nextDerivative( unsigned int idx[] , unsigned int& dir , Data& dValue ) = 0;
+	};
+
+	// Initialize the tree using a refinement avatar
+	static int Initialize( FEMTreeNode& root , int maxDepth , std::function< bool ( int , int[] ) > Refine , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+
+	// Initialize the tree using a point stream
+	static int Initialize( FEMTreeNode& root , InputPointStream< Real , Dim >& pointStream , int maxDepth , std::vector< PointSample >& samplePoints , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	template< class Data > static int Initialize( FEMTreeNode& root , InputPointStreamWithData< Real , Dim , Data >& pointStream , int maxDepth , std::vector< PointSample >& samplePoints , std::vector< Data >& sampleData , bool mergeNodeSamples , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer , std::function< Real ( const Point< Real , Dim >& , Data& ) > ProcessData = []( const Point< Real , Dim >& , Data& ){ return (Real)1.; } );
+
+	// Initialize the tree using simplices
+	static void Initialize( FEMTreeNode& root , const std::vector< Point< Real , Dim > >& vertices , const std::vector< SimplexIndex< Dim-1 > >& simplices , int maxDepth , std::vector< PointSample >& samples , bool mergeNodeSamples , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	static void Initialize( FEMTreeNode& root , const std::vector< Point< Real , Dim > >& vertices , const std::vector< SimplexIndex< Dim-1 > >& simplices , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& nodeSimplices , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	template< class Data , class _Data , bool Dual=true >
+	static int Initialize( FEMTreeNode& root , ConstPointer( Data ) values , ConstPointer( int ) labels , int resolution[Dim] , std::vector< NodeSample< Dim , _Data > > derivatives[Dim] , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer , std::function< _Data ( const Data& ) > DataConverter = []( const Data& d ){ return (_Data)d; }	);
+
+	template< bool Dual , class Data >
+	static unsigned int Initialize( FEMTreeNode& root , DerivativeStream< Data >& dStream , std::vector< NodeSample< Dim , Data > > derivatives[Dim] , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+
+protected:
+	static int _AddSimplex( FEMTreeNode& root , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< PointSample >& samples , std::vector< int >* nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	static int _AddSimplex( FEMTreeNode& root , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& simplices , std::vector< int >& nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	static int _AddSimplex( FEMTreeNode* node , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< PointSample >& samples , std::vector< int >* nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+	static int _AddSimplex( FEMTreeNode* node , Simplex< Real , Dim , Dim-1 >& s , int maxDepth , std::vector< NodeSimplices< Dim , Real > >& simplices , std::vector< int >& nodeToIndexMap , Allocator< FEMTreeNode >* nodeAllocator , std::function< void ( FEMTreeNode& ) > NodeInitializer );
+};
+template< unsigned int Dim , class Real >
+template< unsigned int ... SupportSizes >
+const unsigned int FEMTree< Dim , Real >::CornerLoopData< SupportSizes ... >::supportSizes[] = { SupportSizes ... };
+
+#include "FEMTree.inl"
+#include "FEMTree.SortedTreeNodes.inl"
+#include "FEMTree.WeightedSamples.inl"
+#include "FEMTree.System.inl"
+#include "FEMTree.Evaluation.inl"
+#include "FEMTree.IsoSurface.specialized.inl"
+#include "FEMTree.Initialize.inl"
+#endif // FEM_TREE_INCLUDED
diff --git a/Src/FEMTree.inl b/Src/FEMTree.inl
new file mode 100644
index 0000000..b07a188
--- /dev/null
+++ b/Src/FEMTree.inl
@@ -0,0 +1,1047 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#include <functional>
+#include <cmath>
+#include <climits>
+#include "MyMiscellany.h"
+
+/////////////////////
+// FEMTreeNodeData //
+/////////////////////
+FEMTreeNodeData::FEMTreeNodeData( void ){ flags = 0; }
+FEMTreeNodeData::~FEMTreeNodeData( void ) { }
+
+
+/////////////
+// FEMTree //
+/////////////
+template< unsigned int Dim , class Real >
+double FEMTree< Dim , Real >::MemoryUsage( void )
+{
+	double mem = double( MemoryInfo::Usage() ) / (1<<20);
+	_MaxMemoryUsage = std::max< double >( mem , _MaxMemoryUsage );
+	_LocalMemoryUsage = std::max< double >( mem , _LocalMemoryUsage );
+	return mem;
+}
+
+template< unsigned int Dim , class Real > FEMTree< Dim , Real >::FEMTree( int blockSize )
+{
+	if( blockSize>0 )
+	{
+		nodeAllocator = new Allocator< FEMTreeNode >();
+		nodeAllocator->set( blockSize );
+	}
+	else nodeAllocator = NULL;
+	_nodeCount = 0;
+	_tree = FEMTreeNode::NewBrood( nodeAllocator , _NodeInitializer( *this ) );
+	_tree->initChildren( nodeAllocator , _NodeInitializer( *this ) ) , _spaceRoot = _tree->children;
+	int offset[Dim];
+	for( int d=0 ; d<Dim ; d++ ) offset[d] = 0;
+	RegularTreeNode< Dim , FEMTreeNodeData >::ResetDepthAndOffset( _spaceRoot , 0 , offset );
+	_depthOffset = 0;
+	memset( _femSigs1 , -1 , sizeof( _femSigs1 ) );
+	memset( _femSigs2 , -1 , sizeof( _femSigs2 ) );
+	memset( _refinableSigs , -1 , sizeof( _refinableSigs ) );
+}
+template< unsigned int Dim , class Real >
+FEMTree< Dim , Real >::FEMTree( FILE* fp , int blockSize )
+{
+	if( blockSize>0 )
+	{
+		nodeAllocator = new Allocator< FEMTreeNode >();
+		nodeAllocator->set( blockSize );
+	}
+	else nodeAllocator = NULL;
+	if( fp )
+	{
+		if( fread( &_depthOffset , sizeof( int ) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read depth offset" );
+		_tree = FEMTreeNode::NewBrood( nodeAllocator , _NodeInitializer( *this ) );
+		_tree->read( fp , nodeAllocator , _NodeInitializer( *this ) );
+		_maxDepth = _tree->maxDepth() - _depthOffset;
+
+		_spaceRoot = _tree->children;
+
+		if( _depthOffset>1 )
+		{
+			_spaceRoot = _tree->children + (1<<Dim)-1;
+			for( int d=1 ; d<_depthOffset ; d++ )
+				if( !_spaceRoot->children ) ERROR_OUT( "Expected children" );
+				else _spaceRoot = _spaceRoot->children;
+		}
+		_sNodes.set( *_tree , NULL );
+	}
+	else
+	{
+		_tree = FEMTreeNode::NewBrood( nodeAllocator , _NodeInitializer( *this ) );
+		_tree->initChildren( nodeAllocator , _NodeInitializer( *this ) ) , _spaceRoot = _tree->children;
+		int offset[Dim];
+		for( int d=0 ; d<Dim ; d++ ) offset[d] = 0;
+		RegularTreeNode< Dim , FEMTreeNodeData >::ResetDepthAndOffset( _spaceRoot , 0 , offset );
+		_depthOffset = 0;
+	}
+}
+template< unsigned int Dim , class Real > void FEMTree< Dim , Real >::write( FILE* fp ) const
+{
+	fwrite( &_depthOffset , sizeof( int ) , 1 , fp );
+	_tree->write( fp );
+}
+
+template< unsigned int Dim , class Real >
+const RegularTreeNode< Dim , FEMTreeNodeData >* FEMTree< Dim , Real >::leaf( Point< Real , Dim > p ) const
+{
+	if( !_InBounds( p ) ) return NULL;
+	Point< Real , Dim > center;
+	for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+	Real width = Real(1.0);
+	FEMTreeNode* node = _spaceRoot;
+	while( node->children )
+	{
+		int cIndex = FEMTreeNode::ChildIndex( center , p );
+		node = node->children + cIndex;
+		width /= 2;
+		for( int d=0 ; d<Dim ; d++ )
+			if( (cIndex>>d) & 1 ) center[d] += width/2;
+			else                  center[d] -= width/2;
+	}
+	return node;
+}
+template< unsigned int Dim , class Real >
+RegularTreeNode< Dim , FEMTreeNodeData >* FEMTree< Dim , Real >::leaf( Point< Real , Dim > p , LocalDepth maxDepth )
+{
+	if( !_InBounds( p ) ) return NULL;
+	Point< Real , Dim > center;
+	for( int d=0 ; d<Dim ; d++ ) center[d] = (Real)0.5;
+	Real width = Real(1.0);
+	FEMTreeNode* node = _spaceRoot;
+	LocalDepth d = _localDepth( node );
+	while( ( d<0 && node->children ) || ( d>=0 && d<maxDepth ) )
+	{
+		if( !node->children ) node->initChildren( nodeAllocator , _NodeInitializer( *this ) );
+		int cIndex = FEMTreeNode::ChildIndex( center , p );
+		node = node->children + cIndex;
+		d++;
+		width /= 2;
+		for( int d=0 ; d<Dim ; d++ )
+			if( (cIndex>>d) & 1 ) center[d] += width/2;
+			else                  center[d] -= width/2;
+	}
+	return node;
+}
+
+template< unsigned int Dim , class Real > bool FEMTree< Dim , Real >::_InBounds( Point< Real , Dim > p ){ for( int d=0 ; d<Dim ; d++ ) if( p[d]<0 || p[d]>1 ) return false ; return true; }
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSignatures >
+bool FEMTree< Dim , Real >::isValidFEMNode( UIntPack< FEMSignatures ... > , const FEMTreeNode* node ) const
+{
+	if( GetGhostFlag< Dim >( node ) ) return false;
+	LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+	if( d<0 ) return false;
+	return FEMIntegrator::IsValidFEMNode( UIntPack< FEMSignatures ... >() , d , off );
+}
+template< unsigned int Dim , class Real >
+bool FEMTree< Dim , Real >::isValidSpaceNode( const FEMTreeNode* node ) const
+{
+	if( !node ) return false;
+	LocalDepth d ; LocalOffset off ; _localDepthAndOffset( node , d , off );
+	if( d<0 ) return false;
+	int res = 1<<d;
+	for( int dd=0 ; dd<Dim ; dd++ ) if( off[dd]<0 || off[dd]>=res ) return false;
+	return true;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int ... Degrees >
+void FEMTree< Dim , Real >::_setFullDepth( UIntPack< Degrees ... > , FEMTreeNode* node , LocalDepth depth )
+{
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+	bool refine = d<depth && ( d<0 || !FEMIntegrator::IsOutOfBounds( UIntPack< FEMDegreeAndBType< Degrees , BOUNDARY_FREE >::Signature ... >() , d , off ) );
+	if( refine )
+	{
+		if( !node->children ) node->initChildren( nodeAllocator , _NodeInitializer( *this ) );
+		for( int c=0 ; c<(1<<Dim) ; c++ ) _setFullDepth( UIntPack< Degrees ... >() , node->children+c , depth );
+	}
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... Degrees >
+void FEMTree< Dim , Real >::_setFullDepth( UIntPack< Degrees ... > , LocalDepth depth )
+{
+	if( !_tree->children ) _tree->initChildren( nodeAllocator , _NodeInitializer( *this ) );
+	for( int c=0 ; c<(1<<Dim) ; c++ ) _setFullDepth( UIntPack< Degrees ... >() , _tree->children+c , depth );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... Degrees >
+typename FEMTree< Dim , Real >::LocalDepth FEMTree< Dim , Real >::_getFullDepth( UIntPack< Degrees ... > , const FEMTreeNode* node ) const
+{
+	LocalDepth d ; LocalOffset off;
+	_localDepthAndOffset( node , d , off );
+	bool refine = d<0 || !FEMIntegrator::IsOutOfBounds( UIntPack< FEMDegreeAndBType< Degrees , BOUNDARY_FREE >::Signature ... >() , d , off );
+
+	if( refine )
+	{
+		if( !node->children ) return d;
+		else
+		{
+			LocalDepth depth = INT_MAX;
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+			{
+				LocalDepth d = _getFullDepth( UIntPack< Degrees ... >() , node->children+c );
+				if( d<depth ) depth = d;
+			}
+			return depth;
+		}
+	}
+	else return INT_MAX;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... Degrees >
+typename FEMTree< Dim , Real >::LocalDepth FEMTree< Dim , Real >::getFullDepth( UIntPack< Degrees ... > ) const
+{
+	if( !_tree->children ) return -1;
+	LocalDepth depth = INT_MAX;
+	for( int c=0 ; c<(1<<Dim) ; c++ )
+	{
+		LocalDepth d = _getFullDepth( UIntPack< Degrees ... >() , _tree->children+c );
+		if( d<depth ) depth = d;
+	}
+	return depth;
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int LeftRadius , unsigned int RightRadius , class ... DenseOrSparseNodeData > 
+void FEMTree< Dim , Real >::thicken( FEMTreeNode** nodes , size_t nodeCount, DenseOrSparseNodeData* ... data )
+{
+	std::vector< int > map( _nodeCount );
+	for( int i=0 ; i<_nodeCount ; i++ ) map[i] = i;
+	{
+		int d=0 , off[Dim];
+		for( int d=0 ; d<Dim ; d++ ) off[d] = 0;
+		FEMTreeNode::ResetDepthAndOffset( _tree , d , off );
+	}
+	typename RegularTreeNode< Dim , FEMTreeNodeData >::template NeighborKey< IsotropicUIntPack< Dim , LeftRadius > , IsotropicUIntPack< Dim , RightRadius > > neighborKey;
+	neighborKey.set( _tree->maxDepth() );
+	for( int i=0 ; i<nodeCount ; i++ ) neighborKey.template getNeighbors< true >( nodes[i] , nodeAllocator , _NodeInitializer( *this ) );
+	{
+		int d=0 , off[Dim];
+		for( int d=0 ; d<Dim ; d++ ) off[d] = 0;
+		FEMTreeNode::ResetDepthAndOffset( _spaceRoot , d , off );
+	}
+
+	_reorderDenseOrSparseNodeData( &map[0] , _nodeCount , data ... );
+}
+template< unsigned int Dim , class Real >
+template< unsigned int LeftRadius , unsigned int RightRadius , class IsThickenNode , class ... DenseOrSparseNodeData > 
+void FEMTree< Dim , Real >::thicken( IsThickenNode F , DenseOrSparseNodeData* ... data )
+{
+	std::vector< FEMTreeNode* > nodes;
+	for( FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) if( IsActiveNode( node ) && F( node ) ) nodes.push_back( node );
+	thicken< LeftRadius , RightRadius >( &nodes[0] , nodes.size() , data ... );
+}
+
+template< unsigned int Dim , class Real >
+template< unsigned int DensityDegree >
+typename FEMTree< Dim , Real >::template DensityEstimator< DensityDegree >* FEMTree< Dim , Real >::setDensityEstimator( const std::vector< PointSample >& samples , LocalDepth splatDepth , Real samplesPerNode , int coDimension )
+{
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+	splatDepth = std::max< LocalDepth >( 0 , std::min< LocalDepth >( splatDepth , maxDepth ) );
+	DensityEstimator< DensityDegree >* _density = new DensityEstimator< DensityDegree >( splatDepth , coDimension );
+	DensityEstimator< DensityDegree >& density = *_density;
+	PointSupportKey< IsotropicUIntPack< Dim , DensityDegree > > densityKey;
+	densityKey.set( _localToGlobal( splatDepth ) );
+
+	std::vector< int > sampleMap( nodeCount() , -1 );
+#pragma omp parallel for
+	for( int i=0 ; i<samples.size() ; i++ ) if( samples[i].sample.weight>0 ) sampleMap[ samples[i].node->nodeData.nodeIndex ] = i;
+	std::function< ProjectiveData< Point< Real , Dim > , Real > ( FEMTreeNode* ) > SetDensity = [&] ( FEMTreeNode* node )
+	{
+		ProjectiveData< Point< Real , Dim > , Real > sample;
+		LocalDepth d = _localDepth( node );
+		int idx = node->nodeData.nodeIndex;
+		if( node->children )
+			for( int c=0 ; c<(1<<Dim) ; c++ )
+			{
+				ProjectiveData< Point< Real , Dim > , Real > s = SetDensity( node->children + c );
+				if( d<=splatDepth && s.weight>0 )
+				{
+					Point< Real , Dim > p = s.data / s.weight;
+					Real w = s.weight / samplesPerNode;
+					_addWeightContribution( density , node , p , densityKey , w );
+				}
+				sample += s;
+			}
+		else if( idx<sampleMap.size() && sampleMap[idx]!=-1 )
+		{
+			sample = samples[ sampleMap[ idx ] ].sample;
+			if( d<=splatDepth && sample.weight>0 )
+			{
+				Point< Real , Dim > p = sample.data / sample.weight;
+				Real w = sample.weight / samplesPerNode;
+				_addWeightContribution( density , node , p , densityKey , w );
+			}
+		}
+		return sample;
+	};
+	SetDensity( _spaceRoot );
+
+	MemoryUsage();
+	return _density;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... NormalSigs , unsigned int DensityDegree , class Data >
+SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > > FEMTree< Dim , Real >::setNormalField( UIntPack< NormalSigs ... > , const std::vector< PointSample >& samples , const std::vector< Data >& normalData , const DensityEstimator< DensityDegree >* density , Real& pointWeightSum , std::function< Real ( Real ) > BiasFunction )
+{
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+	typedef PointSupportKey< IsotropicUIntPack< Dim , DensityDegree > > DensityKey;
+	typedef UIntPack< FEMSignature< NormalSigs >::Degree ... > NormalDegrees;
+	typedef PointSupportKey< UIntPack< FEMSignature< NormalSigs >::Degree ... > > NormalKey;
+	std::vector< DensityKey > densityKeys( omp_get_max_threads() );
+	std::vector<  NormalKey >  normalKeys( omp_get_max_threads() );
+	bool oneKey = DensityDegree==NormalDegrees::Min() && DensityDegree==NormalDegrees::Max();
+	for( int i=0 ; i<densityKeys.size() ; i++ ) densityKeys[i].set( _localToGlobal( maxDepth ) );
+	if( !oneKey ) for( int i=0 ; i<normalKeys.size() ; i++ ) normalKeys[i].set( _localToGlobal( maxDepth ) );
+
+	Real weightSum = 0;
+	pointWeightSum = 0;
+	SparseNodeData< Point< Real , Dim > , UIntPack< NormalSigs ... > > normalField;
+	Real _pointWeightSum = 0;
+#pragma omp parallel for reduction( + : weightSum , _pointWeightSum )
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		DensityKey& densityKey = densityKeys[ omp_get_thread_num() ];
+		NormalKey& normalKey = normalKeys[ omp_get_thread_num() ];
+		const ProjectiveData< Point< Real , Dim > , Real >& sample = samples[i].sample;
+		if( sample.weight>0 )
+		{
+			Point< Real , Dim > p = sample.data / sample.weight , n = std::get< 0 >( normalData[i].data ).data;
+			Real l = (Real)Length( n );
+			// It is possible that the samples have non-zero normals but there are two co-located samples with negative normals...
+			if( !l ) continue;
+			Real confidence = l / sample.weight;
+			n *= sample.weight / l;
+			Real depthBias = BiasFunction( confidence );
+			weightSum += sample.weight;
+			if( !_InBounds(p) )
+			{
+				WARN( "Point sample is out of bounds" );
+				continue;
+			}
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+			if( density ) _pointWeightSum += _splatPointData< true , DensityDegree , Point< Real , Dim > >( *density , p , n , normalField , densityKey , oneKey ? *( (NormalKey*)&densityKey ) : normalKey , 0 , maxDepth , Dim , depthBias ) * sample.weight;
+#else // !__GNUC__ || __GNUC__ >=5
+			if( density ) _pointWeightSum += _splatPointData< true , DensityDegree , Point< Real , Dim > , NormalSigs ... >( *density , p , n , normalField , densityKey , oneKey ? *( (NormalKey*)&densityKey ) : normalKey , 0 , maxDepth , Dim , depthBias ) * sample.weight;
+#endif // __GNUC__ || __GNUC__ < 4
+			else
+			{
+				Real width = (Real)( 1.0 / ( 1<<maxDepth ) );
+#if defined( __GNUC__ ) && __GNUC__ < 5
+#warning "you've got me gcc version<5"
+				_splatPointData< true , Point< Real , Dim > >( leaf( p , maxDepth ) , p , n / (Real)pow( width , Dim ) , normalField , oneKey ? *( (NormalKey*)&densityKey ) : normalKey );
+#else // !__GNUC__ || __GNUC__ >=5
+				_splatPointData< true , Point< Real , Dim > , NormalSigs ... >( leaf( p , maxDepth ) , p , n / (Real)pow( width , Dim ) , normalField , oneKey ? *( (NormalKey*)&densityKey ) : normalKey );
+#endif // __GNUC__ || __GNUC__ < 4
+				_pointWeightSum += sample.weight;
+			}
+		}
+	}
+	pointWeightSum = _pointWeightSum / weightSum;
+	MemoryUsage();
+	return normalField;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int DataSig , bool CreateNodes , unsigned int DensityDegree , class Data >
+SparseNodeData< Data , IsotropicUIntPack< Dim , DataSig > > FEMTree< Dim , Real >::setSingleDepthDataField( const std::vector< PointSample >& samples , const std::vector< Data >& sampleData , const DensityEstimator< DensityDegree >* density )
+{
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+	PointSupportKey< IsotropicUIntPack< Dim , DensityDegree > > densityKey;
+	PointSupportKey< IsotropicUIntPack< Dim , FEMSignature< DataSig >::Degree > > dataKey;
+	densityKey.set( _localToGlobal( maxDepth ) ) , dataKey.set( _localToGlobal( maxDepth ) );
+
+	SparseNodeData< Data , IsotropicUIntPack< Dim , DataSig > > dataField;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const ProjectiveData< Point< Real , Dim > , Real >& sample = samples[i].sample;
+		const Data& data = sampleData[i];
+		Point< Real , Dim > p = sample.weight==0 ? sample.data : sample.data / sample.weight;
+		if( !_InBounds(p) )
+		{
+			WARN( "Point is out of bounds" );
+			continue;
+		}
+		if( density ) _splatPointData< CreateNodes , DensityDegree , DataSig >( *density             , p , data * sample.weight , dataField , densityKey , dataKey , 0 , maxDepth , Dim );
+		else          _splatPointData< CreateNodes ,                 DataSig >( leaf( p , maxDepth ) , p , data * sample.weight , dataField , dataKey );
+	}
+	MemoryUsage();
+	return dataField;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int DataSig , bool CreateNodes , unsigned int DensityDegree , class Data >
+SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > FEMTree< Dim , Real >::setDataField( const std::vector< PointSample >& samples , std::vector< Data >& sampleData , const DensityEstimator< DensityDegree >* density , bool nearest )
+{
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+	PointSupportKey< IsotropicUIntPack< Dim , DensityDegree > > densityKey;
+	PointSupportKey< IsotropicUIntPack< Dim , FEMSignature< DataSig >::Degree > > dataKey;
+	densityKey.set( _localToGlobal( maxDepth ) ) , dataKey.set( _localToGlobal( maxDepth ) );
+
+	SparseNodeData< ProjectiveData< Data , Real > , IsotropicUIntPack< Dim , DataSig > > dataField;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const ProjectiveData< Point< Real , Dim > , Real >& sample = samples[i].sample;
+		const Data& data = sampleData[i];
+		Point< Real , Dim > p = sample.weight==0 ? sample.data : sample.data / sample.weight;
+		if( !_InBounds(p) )
+		{
+			WARN( "Point is out of bounds" );
+			continue;
+		}
+		if( nearest ) _nearestMultiSplatPointData< DensityDegree >( density , (FEMTreeNode*)samples[i].node , p , ProjectiveData< Data , Real >( data , sample.weight ) , dataField , densityKey , 2 );
+		else          _multiSplatPointData< CreateNodes , DensityDegree >( density , (FEMTreeNode*)samples[i].node , p , ProjectiveData< Data , Real >( data , sample.weight ) , dataField , densityKey , dataKey , 2 );
+	}
+	MemoryUsage();
+	return dataField;
+}
+template< unsigned int Dim , class Real >
+template< unsigned int MaxDegree , class HasDataFunctor , class ... DenseOrSparseNodeData >
+void FEMTree< Dim , Real >::finalizeForMultigrid( LocalDepth fullDepth , const HasDataFunctor F , DenseOrSparseNodeData* ... data )
+{
+	_depthOffset = 1;
+	while( _localInset( 0 ) + BSplineEvaluationData< FEMDegreeAndBType< MaxDegree >::Signature >::Begin( 0 )<0 || _localInset( 0 ) + BSplineEvaluationData< FEMDegreeAndBType< MaxDegree >::Signature >::End( 0 )>(1<<_depthOffset) )
+	{
+		//                       +-+-+-+-+-+-+-+-+
+		//                       | | | | | | | | |
+		//                       +-+-+-+-+-+-+-+-+
+		//                       | | | | | | | | |
+		//          +-+-+-+-+    +-+-+-+-+-+-+-+-+
+		//          | | | | |    | | | | | | | | |
+		// +-+-+    +-+-+-+-+    +-+-+-+-+-+-+-+-+
+		// |*| |    | | | | |    | | | | | | | | |
+		// +-o-+ -> +-+-o-+-+ -> +-+-+-+-o-+-+-+-+
+		// | | |    | | |*| |    | | | | |*| | | |
+		// +-+-+    +-+-+-+-+    +-+-+-+-+-+-+-+-+
+		//          | | | | |    | | | | | | | | |
+		//          +-+-+-+-+    +-+-+-+-+-+-+-+-+
+		//                       | | | | | | | | |
+		//                       +-+-+-+-+-+-+-+-+
+		//                       | | | | | | | | |
+		//                       +-+-+-+-+-+-+-+-+
+
+		FEMTreeNode* newSpaceRootParent = FEMTreeNode::NewBrood( nodeAllocator , _NodeInitializer( *this ) );
+		FEMTreeNode* oldSpaceRootParent = _spaceRoot->parent;
+		int corner = _depthOffset<=1 ? (1<<Dim)-1 : 0;
+		newSpaceRootParent[corner].children = _spaceRoot;
+		oldSpaceRootParent->children = newSpaceRootParent;
+		for( int c=0 ; c<(1<<Dim) ; c++ ) _spaceRoot[c].parent = newSpaceRootParent + corner , newSpaceRootParent[c].parent = oldSpaceRootParent;
+		_depthOffset++;
+	}
+	int d=0 , off[Dim];
+	for( int d=0 ; d<Dim ; d++ ) off[d] = 0;
+	FEMTreeNode::ResetDepthAndOffset( _tree , d , off );
+	_maxDepth = _spaceRoot->maxDepth();
+	// Make the low-resolution part of the tree be complete
+	fullDepth = std::max< LocalDepth >( 0 , std::min< LocalDepth >( _maxDepth , fullDepth ) );
+	_setFullDepth( IsotropicUIntPack< Dim , MaxDegree >() , fullDepth );
+	// Clear all the flags and make everything that is not low-res a ghost node
+	for( FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) node->nodeData.flags = 0 , SetGhostFlag< Dim >( node , _localDepth( node )>fullDepth );
+
+	// Set the ghost nodes for the high-res part of the tree
+	_clipTree( F , fullDepth );
+
+	const int OverlapRadius = -BSplineOverlapSizes< MaxDegree , MaxDegree >::OverlapStart;
+	int maxDepth = _tree->maxDepth( );
+	typedef typename FEMTreeNode::template NeighborKey< IsotropicUIntPack< Dim , OverlapRadius > , IsotropicUIntPack< Dim , OverlapRadius > > NeighborKey;
+
+	std::vector< NeighborKey > neighborKeys( omp_get_max_threads() );
+	for( int i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( _maxDepth-1 ) );
+
+	for( LocalDepth d=_maxDepth-1 ; d>=0 ; d-- )
+	{
+		std::vector< FEMTreeNode* > nodes;
+		auto NodeTerminationLambda = [&]( const FEMTreeNode *node ){ return _localDepth( node )==d; };
+		for( FEMTreeNode* node=_tree->nextNode( NodeTerminationLambda , NULL ) ; node ; node=_tree->nextNode( NodeTerminationLambda , node ) ) if( _localDepth( node )==d && IsActiveNode< Dim >( node->children ) ) nodes.push_back( node );
+#pragma omp parallel for
+		for( int i=0 ; i<nodes.size() ; i++ )
+		{
+			NeighborKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+			FEMTreeNode* node = nodes[i];
+			neighborKey.template getNeighbors< true >( node , nodeAllocator , _NodeInitializer( *this ) );
+			Pointer( FEMTreeNode* ) nodes = neighborKey.neighbors[ _localToGlobal(d) ].neighbors().data;
+			unsigned int size = neighborKey.neighbors[ _localToGlobal(d) ].neighbors.Size;
+			for( unsigned int i=0 ; i<size ; i++ ) SetGhostFlag< Dim >( nodes[i] , false );
+		}
+	}
+	std::vector< int > map;
+	_sNodes.set( *_tree , &map );
+	_setSpaceValidityFlags();
+	for( FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) if( !IsActiveNode< Dim >( node ) ) node->nodeData.nodeIndex = -1;
+	_reorderDenseOrSparseNodeData( &map[0] , _sNodes.size() , data ... );
+	MemoryUsage();
+}
+
+template< unsigned int Dim , class Real >
+void FEMTree< Dim , Real >::_setSpaceValidityFlags( void ) const
+{
+#pragma omp parallel for
+	for( int i=0 ; i<_sNodes.size() ; i++ )
+	{
+		const unsigned char MASK = ~( FEMTreeNodeData::SPACE_FLAG );
+		_sNodes.treeNodes[i]->nodeData.flags &= MASK;
+		if( isValidSpaceNode( _sNodes.treeNodes[i] ) ) _sNodes.treeNodes[i]->nodeData.flags |= FEMTreeNodeData::SPACE_FLAG;
+	}
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs1 >
+void FEMTree< Dim , Real >::_setFEM1ValidityFlags( UIntPack< FEMSigs1 ... > ) const
+{
+	bool needToReset;
+	unsigned int femSigs1[] = { FEMSigs1 ... };
+#pragma omp critical (set_fem_1_validity_flags)
+	{
+		needToReset = memcmp( femSigs1 , _femSigs1 , sizeof( _femSigs1 ) )!=0;
+		if( needToReset ) memcpy( _femSigs1 , femSigs1 , sizeof( _femSigs1 ) );
+	}
+	if( needToReset )
+		for( int i=0 ; i<_sNodes.size() ; i++ )
+		{
+			const unsigned char MASK = ~( FEMTreeNodeData::FEM_FLAG_1 );
+			_sNodes.treeNodes[i]->nodeData.flags &= MASK;
+			if( isValidFEMNode( UIntPack< FEMSigs1 ... >() , _sNodes.treeNodes[i] ) ) _sNodes.treeNodes[i]->nodeData.flags |= FEMTreeNodeData::FEM_FLAG_1;
+		}
+
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs2 >
+void FEMTree< Dim , Real >::_setFEM2ValidityFlags( UIntPack< FEMSigs2 ... > ) const
+{
+	bool needToReset;
+	unsigned int femSigs2[] = { FEMSigs2 ... };
+#pragma omp critical (set_fem_2_validity_flags)
+	{
+		needToReset = memcmp( femSigs2 , _femSigs2 , sizeof( _femSigs2 ) )!=0;
+		if( needToReset ) memcpy( _femSigs2 , femSigs2 , sizeof( _femSigs2 ) );
+	}
+	if( needToReset )
+		for( int i=0 ; i<_sNodes.size() ; i++ )
+		{
+			const unsigned char MASK = ~( FEMTreeNodeData::FEM_FLAG_2 );
+			_sNodes.treeNodes[i]->nodeData.flags &= MASK;
+			if( isValidFEMNode( UIntPack< FEMSigs2 ... >() , _sNodes.treeNodes[i] ) ) _sNodes.treeNodes[i]->nodeData.flags |= FEMTreeNodeData::FEM_FLAG_2;
+		}
+}
+template< unsigned int Dim , class Real >
+template< unsigned int ... FEMSigs >
+void FEMTree< Dim , Real >::_setRefinabilityFlags( UIntPack< FEMSigs ... > ) const
+{
+	bool needToReset;
+	unsigned int refinableSigs[] = { FEMSigs ... };
+#pragma omp critical (set_refinability_flags)
+	{
+		needToReset = memcmp( refinableSigs , _refinableSigs , sizeof( _refinableSigs ) )!=0;
+		if( needToReset ) memcpy( _refinableSigs , refinableSigs , sizeof( _refinableSigs ) );
+	}
+	if( needToReset )
+	{
+		typedef typename FEMTreeNode::template ConstNeighborKey< UIntPack< ( - BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart ) ... > , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleEnd ... > > UpSampleKey;
+		typedef typename FEMTreeNode::template ConstNeighbors< UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... > > UpSampleNeighbors;
+		static const int UpSampleStart[] = { BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleStart ... };
+		std::vector< UpSampleKey > neighborKeys( omp_get_max_threads() );
+		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( _maxDepth ) );
+
+		for( int d=0 ; d<_maxDepth ; d++ )
+#pragma omp parallel for
+			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
+			{
+				UpSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
+
+				// Clear the refinability flag
+				const unsigned char MASK = ~( FEMTreeNodeData::REFINABLE_FLAG );
+				_sNodes.treeNodes[i]->nodeData.flags &= MASK;
+
+				LocalDepth d ; LocalOffset pOff;
+				_localDepthAndOffset( _sNodes.treeNodes[i] , d , pOff );
+
+				// Get the supporting child neighbors
+				neighborKey.getNeighbors( _sNodes.treeNodes[i] );
+				UpSampleNeighbors neighbors;
+				neighborKey.getChildNeighbors( 0 , _localToGlobal( d ) , neighbors );
+
+				// Check if the child neighbors exist (i.e. that the children nodes are not ghost-nodes if they correspond to valid coefficients)
+				bool refinable = true;
+				LocalOffset cOff;
+				WindowLoop< Dim >::Run
+				(
+					IsotropicUIntPack< Dim , 0 >() , UIntPack< BSplineSupportSizes< FEMSignature< FEMSigs >::Degree >::UpSampleSize ... >() ,
+					[&]( int d , int i ){ cOff[d] = pOff[d]*2 + UpSampleStart[d] + i; } ,
+					[&]( const FEMTreeNode* node ){ if( GetGhostFlag< Dim >( node ) && FEMIntegrator::IsValidFEMNode( UIntPack< FEMSigs ... >() , d+1 , cOff ) ) refinable = false; } ,
+					neighbors.neighbors()
+				);
+				if( refinable ) _sNodes.treeNodes[i]->nodeData.flags |= FEMTreeNodeData::REFINABLE_FLAG;
+			}
+	}
+}
+template< unsigned int Dim , class Real >
+template< class HasDataFunctor >
+void FEMTree< Dim , Real >::_clipTree( const HasDataFunctor& f , LocalDepth fullDepth )
+{
+	std::vector< FEMTreeNode * > nodes;
+	auto NodeTerminationLambda = [&]( const FEMTreeNode *node ){ return _localDepth( node )==fullDepth; };
+	for( FEMTreeNode* temp=_tree->nextNode( NodeTerminationLambda , NULL ) ; temp ; temp=_tree->nextNode( NodeTerminationLambda , temp ) ) if( _localDepth( temp )==fullDepth ) nodes.push_back( temp );
+#pragma omp parallel for
+	for( int i=0 ; i<nodes.size() ; i++ )
+		for( FEMTreeNode* node=nodes[i]->nextNode() ; node ; node=nodes[i]->nextNode(node) ) if( node->children )
+		{
+			bool hasData = false;
+			for( int c=0 ; c<(1<<Dim) && !hasData ; c++ ) hasData |= f( node->children + c );
+			for( int c=0 ; c<(1<<Dim) ; c++ ) SetGhostFlag< Dim >( node->children+c , !hasData );
+		}
+}
+
+template< unsigned int Dim , class Real >
+template< typename T , typename Data , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+void FEMTree< Dim , Real >::_ExactPointAndDataInterpolationInfo< T , Data , PointD , ConstraintDual , SystemDual >::_init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , bool noRescale )
+{
+	_sampleSpan.resize( tree.nodesSize() );
+#pragma omp parallel for
+	for( int i=0 ; i<tree.nodesSize() ; i++ ) _sampleSpan[i] = std::pair< int , int >( 0 , 0 );
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) ) _sampleSpan[ leaf->nodeData.nodeIndex ].second++;
+	}
+	_iData.resize( samples.size() );
+
+	std::function< void ( FEMTreeNode* , int& ) > SetRange = [&] ( FEMTreeNode* node , int& start )
+	{
+		std::pair< int , int >& span = _sampleSpan[ node->nodeData.nodeIndex ];
+		if( tree._isValidSpaceNode( node->children ) )
+		{
+			for( int c=0 ; c<(1<<Dim) ; c++ ) SetRange( node->children + c , start );
+			span.first  = _sampleSpan[ node->children[0           ].nodeData.nodeIndex ].first;
+			span.second = _sampleSpan[ node->children[ (1<<Dim)-1 ].nodeData.nodeIndex ].second;
+		}
+		else
+		{
+			span.second = start + span.second - span.first;
+			span.first = start;
+			start += span.second - span.first;
+		}
+	};
+
+	int start = 0;
+	SetRange( tree._spaceRoot , start );
+	for( FEMTreeNode* node=tree._spaceRoot->nextNode() ; node ; node=tree._spaceRoot->nextNode(node) )
+		if( tree._isValidSpaceNode( node ) && !tree._isValidSpaceNode( node->children ) ) _sampleSpan[ node->nodeData.nodeIndex ].second = _sampleSpan[ node->nodeData.nodeIndex ].first;
+
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) )
+		{
+			const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+			DualPointAndDataInfo< Dim , Real , Data , T , PointD >& _pData = _iData[ _sampleSpan[ leaf->nodeData.nodeIndex ].second++ ];
+			_pData.pointInfo.position = pData.data;
+			_pData.pointInfo.weight = pData.weight;
+			_pData.pointInfo.dualValues = _constraintDual( pData.data/pData.weight , sampleData[i]/pData.weight ) * pData.weight;
+			_pData.data = sampleData[i];
+		}
+	}
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)_iData.size() ; i++ )
+	{
+		Real w = _iData[i].pointInfo.weight;
+		_iData[i] /= w;
+		if( noRescale ) _iData[i].pointInfo.weight = w;
+		else            _iData[i].pointInfo.weight = w * ( 1<<tree._maxDepth );
+		_iData[i].pointInfo.dualValues *= _iData[i].pointInfo.weight;
+	}
+}
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int PointD , typename ConstraintDual , typename SystemDual >
+void FEMTree< Dim , Real >::ExactPointInterpolationInfo< T , PointD , ConstraintDual , SystemDual >::_init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , bool noRescale )
+{
+	_sampleSpan.resize( tree.nodesSize() );
+#pragma omp parallel for
+	for( int i=0 ; i<tree.nodesSize() ; i++ ) _sampleSpan[i] = std::pair< int , int >( 0 , 0 );
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) ) _sampleSpan[ leaf->nodeData.nodeIndex ].second++;
+	}
+	_iData.resize( samples.size() );
+
+	std::function< void ( FEMTreeNode* , int& ) > SetRange = [&] ( FEMTreeNode* node , int& start )
+	{
+		std::pair< int , int >& span = _sampleSpan[ node->nodeData.nodeIndex ];
+		if( tree._isValidSpaceNode( node->children ) )
+		{
+			for( int c=0 ; c<(1<<Dim) ; c++ ) SetRange( node->children + c , start );
+			span.first  = _sampleSpan[ node->children[0           ].nodeData.nodeIndex ].first;
+			span.second = _sampleSpan[ node->children[ (1<<Dim)-1 ].nodeData.nodeIndex ].second;
+		}
+		else
+		{
+			span.second = start + span.second - span.first;
+			span.first = start;
+			start += span.second - span.first;
+		}
+	};
+
+	int start = 0;
+	SetRange( tree._spaceRoot , start );
+	for( FEMTreeNode* node=tree._spaceRoot->nextNode() ; node ; node=tree._spaceRoot->nextNode(node) )
+		if( tree._isValidSpaceNode( node ) && !tree._isValidSpaceNode( node->children ) ) _sampleSpan[ node->nodeData.nodeIndex ].second = _sampleSpan[ node->nodeData.nodeIndex ].first;
+
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) )
+		{
+			const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+			DualPointInfo< Dim , Real , T , PointD >& _pData = _iData[ _sampleSpan[ leaf->nodeData.nodeIndex ].second++ ];
+			_pData.position = pData.data;
+			_pData.dualValues = _constraintDual( pData.data/pData.weight ) * pData.weight;
+			_pData.weight = pData.weight;
+		}
+	}
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)_iData.size() ; i++ )
+	{
+		Real w = _iData[i].weight;
+		_iData[i] /= w;
+		if( noRescale ) _iData[i].weight = w;
+		else            _iData[i].weight = w * ( 1<<tree._maxDepth );
+		_iData[i].dualValues *= _iData[i].weight;
+	}
+}
+template< unsigned int Dim , class Real >
+template< unsigned int PointD , typename ConstraintDual , typename SystemDual >
+void FEMTree< Dim , Real >::ExactPointInterpolationInfo< double , PointD , ConstraintDual , SystemDual >::_init( const class FEMTree< Dim , Real >& tree , const std::vector< PointSample >& samples , bool noRescale )
+{
+	_sampleSpan.resize( tree.nodesSize() );
+#pragma omp parallel for
+	for( int i=0 ; i<tree.nodesSize() ; i++ ) _sampleSpan[i] = std::pair< int , int >( 0 , 0 );
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) ) _sampleSpan[ leaf->nodeData.nodeIndex ].second++;
+	}
+	_iData.resize( samples.size() );
+
+	std::function< void ( FEMTreeNode* , int& ) > SetRange = [&] ( FEMTreeNode* node , int& start )
+	{
+		std::pair< int , int >& span = _sampleSpan[ node->nodeData.nodeIndex ];
+		if( tree._isValidSpaceNode( node->children ) )
+		{
+			for( int c=0 ; c<(1<<Dim) ; c++ ) SetRange( node->children + c , start );
+			span.first  = _sampleSpan[ node->children[0           ].nodeData.nodeIndex ].first;
+			span.second = _sampleSpan[ node->children[ (1<<Dim)-1 ].nodeData.nodeIndex ].second;
+		}
+		else
+		{
+			span.second = start + span.second - span.first;
+			span.first = start;
+			start += span.second - span.first;
+		}
+	};
+
+	int start = 0;
+	SetRange( tree._spaceRoot , start );
+	for( FEMTreeNode* node=tree._spaceRoot->nextNode() ; node ; node=tree._spaceRoot->nextNode(node) )
+		if( tree._isValidSpaceNode( node ) && !tree._isValidSpaceNode( node->children ) ) _sampleSpan[ node->nodeData.nodeIndex ].second = _sampleSpan[ node->nodeData.nodeIndex ].first;
+
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* leaf = samples[i].node;
+		while( leaf && !tree._isValidSpaceNode( leaf ) ) leaf = leaf->parent;
+		if( leaf && tree._isValidSpaceNode( leaf ) )
+		{
+			const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+			DualPointInfo< Dim , Real , T , PointD >& _pData = _iData[ _sampleSpan[ leaf->nodeData.nodeIndex ].second++ ];
+			_pData.position = pData.data;
+			_pData.dualValues = _constraintDual( pData.data/pData.weight ) * pData.weight;
+			_pData.weight = pData.weight;
+		}
+	}
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)_iData.size() ; i++ )
+	{
+		Real w = _iData[i].weight;
+		_iData[i] /= w;
+		if( noRescale ) _iData[i].weight = w;
+		else            _iData[i].weight = w * ( 1<<tree._maxDepth );
+		_iData[i].dualValues *= _iData[i].weight;
+	}
+}
+template< unsigned int Dim , class Real >
+template< typename T >
+bool FEMTree< Dim , Real >::_setInterpolationInfoFromChildren( FEMTreeNode* node , SparseNodeData< T , IsotropicUIntPack< Dim , FEMTrivialSignature > >& interpolationInfo ) const
+{
+	if( IsActiveNode< Dim >( node->children ) )
+	{
+		bool hasChildData = false;
+		T t = {};
+		for( int c=0 ; c<(1<<Dim) ; c++ )
+			if( _setInterpolationInfoFromChildren( node->children + c , interpolationInfo ) )
+			{
+				t += interpolationInfo[ node->children + c ];
+				hasChildData = true;
+			}
+		if( hasChildData && IsActiveNode< Dim >( node ) ) interpolationInfo[ node ] += t;
+		return hasChildData;
+	}
+	else return interpolationInfo( node )!=NULL;
+}
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int PointD , typename ConstraintDual >
+SparseNodeData< DualPointInfo< Dim , Real , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > FEMTree< Dim , Real >::_densifyInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstraintDual constraintDual , int adaptiveExponent ) const
+{
+	SparseNodeData< DualPointInfo< Dim , Real , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > iInfo;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* node = samples[i].node;
+		const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+		while( !IsActiveNode< Dim >( node ) ) node = node->parent;
+		if( pData.weight )
+		{
+			DualPointInfo< Dim , Real , T , PointD >& _pData = iInfo[node];
+			_pData.position += pData.data;
+			_pData.weight += pData.weight;
+			_pData.dualValues += constraintDual( pData.data/pData.weight ) * pData.weight;
+		}
+	}
+
+	// Set the interior values
+	_setInterpolationInfoFromChildren( _spaceRoot , iInfo );
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)iInfo.size() ; i++ )
+	{
+		Real w = iInfo[i].weight;
+		iInfo[i] /= w ; iInfo[i].weight = w;
+	}
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+
+	// Set the average position and scale the weights
+	for( const FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode(node) ) if( IsActiveNode< Dim >( node ) )
+	{
+		DualPointInfo< Dim , Real , T , PointD >* pData = iInfo( node );
+		if( pData )
+		{
+			int e = _localDepth( node ) * adaptiveExponent - ( maxDepth ) * (adaptiveExponent-1);
+			if( e<0 ) pData->weight /= Real( 1<<(-e) );
+			else      pData->weight *= Real( 1<<  e  );
+			pData->dualValues *= pData->weight;
+		}
+	}
+	return iInfo;
+}
+template< unsigned int Dim , class Real >
+template< typename T , typename Data , unsigned int PointD , typename ConstraintDual >
+SparseNodeData< DualPointAndDataInfo< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > FEMTree< Dim , Real >::_densifyInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , int adaptiveExponent ) const
+{
+	SparseNodeData< DualPointAndDataInfo< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > iInfo;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* node = samples[i].node;
+		const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+		while( !IsActiveNode< Dim >( node ) ) node = node->parent;
+		if( pData.weight )
+		{
+			DualPointAndDataInfo< Dim , Real , Data , T , PointD >& _pData = iInfo[node];
+			_pData.pointInfo.position += pData.data;
+			_pData.pointInfo.dualValues += constraintDual( pData.data/pData.weight , sampleData[i]/pData.weight ) * pData.weight;
+			_pData.pointInfo.weight += pData.weight;
+			_pData.data += sampleData[i];
+		}
+	}
+
+	// Set the interior values
+	_setInterpolationInfoFromChildren( _spaceRoot , iInfo );
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)iInfo.size() ; i++ )
+	{
+		Real w = iInfo[i].pointInfo.weight;
+		iInfo[i] /= w ; iInfo[i].pointInfo.weight = w;
+	}
+	LocalDepth maxDepth = _spaceRoot->maxDepth();
+
+	// Set the average position and scale the weights
+	for( const FEMTreeNode* node=_tree->nextNode() ; node ; node=_tree->nextNode(node) ) if( IsActiveNode< Dim >( node ) )
+	{
+		DualPointAndDataInfo< Dim , Real , Data , T , PointD >* pData = iInfo( node );
+		if( pData )
+		{
+			int e = _localDepth( node ) * adaptiveExponent - ( maxDepth ) * (adaptiveExponent-1);
+			if( e<0 ) pData->pointInfo.weight /= Real( 1<<(-e) );
+			else      pData->pointInfo.weight *= Real( 1<<  e  );
+			pData->pointInfo.dualValues *= pData->pointInfo.weight;
+		}
+	}
+	return iInfo;
+}
+template< unsigned int Dim , class Real >
+template< typename T , unsigned int PointD , typename ConstraintDual >
+SparseNodeData< DualPointInfoBrood< Dim , Real , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > FEMTree< Dim , Real >::_densifyChildInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstraintDual constraintDual , bool noRescale ) const
+{
+	SparseNodeData< DualPointInfoBrood< Dim , Real , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > iInfo;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* node = samples[i].node;
+		const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+		while( !IsActiveNode< Dim >( node ) ) node = node->parent;
+		if( pData.weight )
+		{
+			DualPointInfoBrood< Dim , Real , T , PointD >& _pData = iInfo[node];
+			Point< Real , Dim > p = pData.data/pData.weight;
+			int cIdx = _childIndex( node , p );
+			_pData[cIdx].position += pData.data;
+			_pData[cIdx].weight += pData.weight;
+			_pData[cIdx].dualValues += constraintDual( p ) * pData.weight;
+		}
+	}
+
+	// Set the interior values
+	_setInterpolationInfoFromChildren( _spaceRoot , iInfo );
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)iInfo.size() ; i++ )
+	{
+		iInfo[i].finalize();
+		for( int c=0 ; c<(int)iInfo[i].size() ; c++ )
+		{
+			iInfo[i][c].position /= iInfo[i][c].weight;
+			if( !noRescale )
+			{
+				iInfo[i][c].weight     *= ( 1<<_maxDepth );
+				iInfo[i][c].dualValues *= ( 1<<_maxDepth );
+			}
+		}
+	}
+	return iInfo;
+}
+template< unsigned int Dim , class Real >
+template< typename T , typename Data , unsigned int PointD , typename ConstraintDual >
+SparseNodeData< DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > FEMTree< Dim , Real >::_densifyChildInterpolationInfoAndSetDualConstraints( const std::vector< PointSample >& samples , ConstPointer( Data ) sampleData , ConstraintDual constraintDual , bool noRescale ) const
+{
+	SparseNodeData< DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD > , IsotropicUIntPack< Dim , FEMTrivialSignature > > iInfo;
+	for( int i=0 ; i<samples.size() ; i++ )
+	{
+		const FEMTreeNode* node = samples[i].node;
+		const ProjectiveData< Point< Real , Dim > , Real >& pData = samples[i].sample;
+		while( !IsActiveNode< Dim >( node ) ) node = node->parent;
+		if( pData.weight )
+		{
+			DualPointAndDataInfoBrood< Dim , Real , Data , T , PointD >& _pData = iInfo[node];
+			Point< Real , Dim > p = pData.data/pData.weight;
+			int cIdx = _childIndex( node , p );
+			_pData[cIdx].pointInfo.position += pData.data;
+			_pData[cIdx].pointInfo.dualValues += constraintDual( p , sampleData[i]/pData.weight ) * pData.weight;
+			_pData[cIdx].pointInfo.weight += pData.weight;
+			_pData[cIdx].data += sampleData[i];
+		}
+	}
+
+	// Set the interior values
+	_setInterpolationInfoFromChildren( _spaceRoot , iInfo );
+
+#pragma omp parallel for
+	for( int i=0 ; i<(int)iInfo.size() ; i++ )
+	{
+		iInfo[i].finalize();
+		for( int c=0 ; c<(int)iInfo[i].size() ; c++ )
+		{
+			iInfo[i][c].pointInfo.position /= iInfo[i][c].pointInfo.weight;
+			iInfo[i][c].data /= iInfo[i][c].pointInfo.weight;
+			if( !noRescale )
+			{
+				iInfo[i][c].pointInfo.weight     *= ( 1<<_maxDepth );
+				iInfo[i][c].pointInfo.dualValues *= ( 1<<_maxDepth );
+				iInfo[i][c].data                 *= ( 1<<_maxDepth );
+			}
+		}
+	}
+	return iInfo;
+}
+
+
+
+template< unsigned int Dim , class Real >
+std::vector< int > FEMTree< Dim , Real >::merge( FEMTree* tree )
+{
+	std::vector< int > map;
+	if( _depthOffset!=tree->_depthOffset ) ERROR_OUT( "depthOffsets don't match: %d != %d" , _depthOffset , tree->_depthOffset );
+
+	// Compute the next available index
+	int nextIndex = 0;
+	for( const FEMTreeNode* node=_tree->nextNode() ; node!=NULL ; node=_tree->nextNode( node ) ) nextIndex = std::max< int >( nextIndex , node->nodeData.nodeIndex+1 );
+
+	// Set the size of the map
+	{
+		int mapSize = 0;
+		for( const FEMTreeNode* node=tree->_tree->nextNode() ; node!=NULL ; node=tree->_tree->nextNode( node ) ) mapSize = std::max< int >( mapSize , node->nodeData.nodeIndex+1 );
+		map.resize( mapSize );
+	}
+
+	std::function< void ( FEMTreeNode* , FEMTreeNode* , std::vector< int >& , int& ) > MergeNodes = [&]( FEMTreeNode* node1 , FEMTreeNode* node2 , std::vector< int >& map , int& nextIndex )
+	{
+		if( node1 && node2 )
+		{
+			if( node2->nodeData.nodeIndex>=0 )
+			{
+				if( node1->nodeData.nodeIndex<0 ) node1->nodeData.nodeIndex = nextIndex++;
+				map[ node2->nodeData.nodeIndex ] = node1->nodeData.nodeIndex;
+			}
+			if( node1->children && node2->children ) for( int c=0 ; c<(1<<Dim) ; c++ ) MergeNodes( node1->children+c , node2->children+c , map , nextIndex );
+			else if( node2->children )
+			{
+				for( int c=0 ; c<(1<<Dim) ; c++ ) MergeNodes( NULL , node2->children+c , map , nextIndex );
+				node1->children = node2->children;
+				node2->children = NULL;
+				for( int c=0 ; c<(1<<Dim) ; c++ ) node1->children[c].parent = node1;
+			}
+		}
+		else if( node2 )
+		{
+			if( node2->nodeData.nodeIndex>=0 ){ map[ node2->nodeData.nodeIndex ] = nextIndex ; node2->nodeData.nodeIndex = nextIndex++; }
+			if( node2->children ) for( int c=0 ; c<(1<<Dim) ; c++ ) MergeNodes( NULL , node2->children+c , map , nextIndex );
+		}
+	};
+
+	MergeNodes( _tree , tree->_tree , map , nextIndex );
+	return map;
+}
+
diff --git a/Src/Factor.cpp b/Src/Factor.cpp
deleted file mode 100644
index 145c100..0000000
--- a/Src/Factor.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-//////////////////////
-// Polynomial Roots //
-//////////////////////
-#include <math.h>
-#include "Factor.h"
-int Factor(double a1,double a0,double roots[1][2],double EPS){
-	if(fabs(a1)<=EPS){return 0;}
-	roots[0][0]=-a0/a1;
-	roots[0][1]=0;
-	return 1;
-}
-int Factor(double a2,double a1,double a0,double roots[2][2],double EPS){
-	double d;
-	if(fabs(a2)<=EPS){return Factor(a1,a0,roots,EPS);}
-
-	d=a1*a1-4*a0*a2;
-	a1/=(2*a2);
-	if(d<0){
-		d=sqrt(-d)/(2*a2);
-		roots[0][0]=roots[1][0]=-a1;
-		roots[0][1]=-d;
-		roots[1][1]= d;
-	}
-	else{
-		d=sqrt(d)/(2*a2);
-		roots[0][1]=roots[1][1]=0;
-		roots[0][0]=-a1-d;
-		roots[1][0]=-a1+d;
-	}
-	return 2;
-}
-// Solution taken from: http://mathworld.wolfram.com/CubicFormula.html
-// and http://www.csit.fsu.edu/~burkardt/f_src/subpak/subpak.f90
-int Factor(double a3,double a2,double a1,double a0,double roots[3][2],double EPS){
-	double q,r,r2,q3;
-
-	if(fabs(a3)<=EPS){return Factor(a2,a1,a0,roots,EPS);}
-	a2/=a3;
-	a1/=a3;
-	a0/=a3;
-
-	q=-(3*a1-a2*a2)/9;
-	r=-(9*a2*a1-27*a0-2*a2*a2*a2)/54;
-	r2=r*r;
-	q3=q*q*q;
-
-	if(r2<q3){
-		double sqrQ=sqrt(q);
-		double theta = acos ( r / (sqrQ*q) );
-		double cTheta=cos(theta/3)*sqrQ;
-		double sTheta=sin(theta/3)*sqrQ*SQRT_3/2;
-		roots[0][1]=roots[1][1]=roots[2][1]=0;
-		roots[0][0]=-2*cTheta;
-		roots[1][0]=-2*(-cTheta*0.5-sTheta);
-		roots[2][0]=-2*(-cTheta*0.5+sTheta);
-	}
-	else{
-		double s1,s2,sqr=sqrt(r2-q3);
-		double t;
-		t=-r+sqr;
-		if(t<0){s1=-pow(-t,1.0/3);}
-		else{s1=pow(t,1.0/3);}
-		t=-r-sqr;
-		if(t<0){s2=-pow(-t,1.0/3);}
-		else{s2=pow(t,1.0/3);}
-		roots[0][1]=0;
-		roots[0][0]=s1+s2;
-		s1/=2;
-		s2/=2;
-		roots[1][0]= roots[2][0]=-s1-s2;
-		roots[1][1]= SQRT_3*(s1-s2);
-		roots[2][1]=-roots[1][1];
-	}
-	roots[0][0]-=a2/3;
-	roots[1][0]-=a2/3;
-	roots[2][0]-=a2/3;
-	return 3;
-}
-double ArcTan2(double y,double x){
-	/* This first case should never happen */
-	if(y==0 && x==0){return 0;}
-	if(x==0){
-		if(y>0){return PI/2.0;}
-		else{return -PI/2.0;}
-	}
-	if(x>=0){return atan(y/x);}
-	else{
-		if(y>=0){return atan(y/x)+PI;}
-		else{return atan(y/x)-PI;}
-	}
-}
-double Angle(const double in[2]){
-	if((in[0]*in[0]+in[1]*in[1])==0.0){return 0;}
-	else{return ArcTan2(in[1],in[0]);}
-}
-void Sqrt(const double in[2],double out[2]){
-	double r=sqrt(sqrt(in[0]*in[0]+in[1]*in[1]));
-	double a=Angle(in)*0.5;
-	out[0]=r*cos(a);
-	out[1]=r*sin(a);
-}
-void Add(const double in1[2],const double in2[2],double out[2]){
-	out[0]=in1[0]+in2[0];
-	out[1]=in1[1]+in2[1];
-}
-void Subtract(const double in1[2],const double in2[2],double out[2]){
-	out[0]=in1[0]-in2[0];
-	out[1]=in1[1]-in2[1];
-}
-void Multiply(const double in1[2],const double in2[2],double out[2]){
-	out[0]=in1[0]*in2[0]-in1[1]*in2[1];
-	out[1]=in1[0]*in2[1]+in1[1]*in2[0];
-}
-void Divide(const double in1[2],const double in2[2],double out[2]){
-	double temp[2];
-	double l=in2[0]*in2[0]+in2[1]*in2[1];
-	temp[0]= in2[0]/l;
-	temp[1]=-in2[1]/l;
-	Multiply(in1,temp,out);
-}
-// Solution taken from: http://mathworld.wolfram.com/QuarticEquation.html
-// and http://www.csit.fsu.edu/~burkardt/f_src/subpak/subpak.f90
-int Factor(double a4,double a3,double a2,double a1,double a0,double roots[4][2],double EPS){
-	double R[2],D[2],E[2],R2[2];
-
-	if(fabs(a4)<EPS){return Factor(a3,a2,a1,a0,roots,EPS);}
-	a3/=a4;
-	a2/=a4;
-	a1/=a4;
-	a0/=a4;
-
-	Factor(1.0,-a2,a3*a1-4.0*a0,-a3*a3*a0+4.0*a2*a0-a1*a1,roots,EPS);
-
-	R2[0]=a3*a3/4.0-a2+roots[0][0];
-	R2[1]=0;
-	Sqrt(R2,R);
-	if(fabs(R[0])>10e-8){
-		double temp1[2],temp2[2];
-		double p1[2],p2[2];
-
-		p1[0]=a3*a3*0.75-2.0*a2-R2[0];
-		p1[1]=0;
-
-		temp2[0]=((4.0*a3*a2-8.0*a1-a3*a3*a3)/4.0);
-		temp2[1]=0;
-		Divide(temp2,R,p2);
-
-		Add     (p1,p2,temp1);
-		Subtract(p1,p2,temp2);
-
-		Sqrt(temp1,D);
-		Sqrt(temp2,E);
-	}
-	else{
-		R[0]=R[1]=0;
-		double temp1[2],temp2[2];
-		temp1[0]=roots[0][0]*roots[0][0]-4.0*a0;
-		temp1[1]=0;
-		Sqrt(temp1,temp2);
-		temp1[0]=a3*a3*0.75-2.0*a2+2.0*temp2[0];
-		temp1[1]=                  2.0*temp2[1];
-		Sqrt(temp1,D);
-		temp1[0]=a3*a3*0.75-2.0*a2-2.0*temp2[0];
-		temp1[1]=                 -2.0*temp2[1];
-		Sqrt(temp1,E);
-	}
-
-	roots[0][0]=-a3/4.0+R[0]/2.0+D[0]/2.0;
-	roots[0][1]=        R[1]/2.0+D[1]/2.0;
-
-	roots[1][0]=-a3/4.0+R[0]/2.0-D[0]/2.0;
-	roots[1][1]=        R[1]/2.0-D[1]/2.0;
-
-	roots[2][0]=-a3/4.0-R[0]/2.0+E[0]/2.0;
-	roots[2][1]=       -R[1]/2.0+E[1]/2.0;
-
-	roots[3][0]=-a3/4.0-R[0]/2.0-E[0]/2.0;
-	roots[3][1]=       -R[1]/2.0-E[1]/2.0;
-	return 4;
-}
-
-int Solve(const double* eqns,const double* values,double* solutions,int dim){
-	int i,j,eIndex;
-	double v,m;
-	int *index=new int[dim];
-	int *set=new int[dim];
-	double* myEqns=new double[dim*dim];
-	double* myValues=new double[dim];
-
-	for(i=0;i<dim*dim;i++){myEqns[i]=eqns[i];}
-	for(i=0;i<dim;i++){
-		myValues[i]=values[i];
-		set[i]=0;
-	}
-	for(i=0;i<dim;i++){
-		// Find the largest equation that has a non-zero entry in the i-th index
-		m=-1;
-		eIndex=-1;
-		for(j=0;j<dim;j++){
-			if(set[j]){continue;}
-			if(myEqns[j*dim+i]!=0 && fabs(myEqns[j*dim+i])>m){
-				m=fabs(myEqns[j*dim+i]);
-				eIndex=j;
-			}
-		}
-		if(eIndex==-1){
-			delete[] index;
-			delete[] myValues;
-			delete[] myEqns;
-			delete[] set;
-			return 0;
-		}
-		// The position in which the solution for the i-th variable can be found
-		index[i]=eIndex;
-		set[eIndex]=1;
-
-		// Normalize the equation
-		v=myEqns[eIndex*dim+i];
-		for(j=0;j<dim;j++){myEqns[eIndex*dim+j]/=v;}
-		myValues[eIndex]/=v;
-
-		// Subtract it off from everything else
-		for(j=0;j<dim;j++){
-			if(j==eIndex){continue;}
-			double vv=myEqns[j*dim+i];
-			for(int k=0;k<dim;k++){myEqns[j*dim+k]-=myEqns[eIndex*dim+k]*vv;}
-			myValues[j]-=myValues[eIndex]*vv;
-		}
-	}
-	for(i=0;i<dim;i++){solutions[i]=myValues[index[i]];}
-	delete[] index;
-	delete[] myValues;
-	delete[] myEqns;
-	delete[] set;
-	return 1;
-}
diff --git a/Src/Factor.h b/Src/Factor.h
index d6ee4b2..3845e17 100644
--- a/Src/Factor.h
+++ b/Src/Factor.h
@@ -29,22 +29,126 @@ DAMAGE.
 #ifndef FACTOR_INCLUDED
 #define FACTOR_INCLUDED
 
-#define PI 3.1415926535897932384
+#include <math.h>
+#include <complex>
+#ifndef SQRT_3
 #define SQRT_3 1.7320508075688772935
+#endif // SQRT_3
+inline int Factor( double a1 , double a0 , std::complex< double > roots[1] , double EPS )
+{
+	if( fabs(a1)<=EPS ) return 0;
+	roots[0] = std::complex< double >( -a0/a1 , 0 );
+	return 1;
+}
+inline int Factor( double a2 , double a1 , double a0 , std::complex< double > roots[2] , double EPS )
+{
+	double d;
+	if( fabs(a2)<=EPS ) return Factor( a1 , a0 , roots , EPS );
 
-double ArcTan2(double y,double x);
-double Angle(const double in[2]);
-void Sqrt(const double in[2],double out[2]);
-void Add(const double in1[2],const double in2[2],double out[2]);
-void Subtract(const double in1[2],const double in2[2],double out[2]);
-void Multiply(const double in1[2],const double in2[2],double out[2]);
-void Divide(const double in1[2],const double in2[2],double out[2]);
+	d = a1*a1 - 4*a0*a2;
+	a1 /= (2*a2);
+	if( d<0 )
+	{
+		d=sqrt(-d)/(2*a2);
+		roots[0] = std::complex< double >( -a1 , -d );
+		roots[1] = std::complex< double >( -a1 ,  d );
+	}
+	else
+	{
+		d = sqrt(d)/(2*a2);
+		roots[0] = std::complex< double >( -a1-d , 0 );
+		roots[1] = std::complex< double >( -a1+d , 0 );
+	}
+	return 2;
+}
+// Solution taken from: http://mathworld.wolfram.com/CubicFormula.html
+// and http://www.csit.fsu.edu/~burkardt/f_src/subpak/subpak.f90
+inline int Factor( double a3 , double a2 , double a1 , double a0 , std::complex< double > roots[3] , double EPS )
+{
+	double q,r,r2,q3;
 
-int Factor(double a1,double a0,double roots[1][2],double EPS);
-int Factor(double a2,double a1,double a0,double roots[2][2],double EPS);
-int Factor(double a3,double a2,double a1,double a0,double roots[3][2],double EPS);
-int Factor(double a4,double a3,double a2,double a1,double a0,double roots[4][2],double EPS);
+	if( fabs(a3)<=EPS ) return Factor( a2 , a1 , a0 , roots , EPS );
+	a2 /= a3 , a1 /= a3 , a0 /= a3;
 
-int Solve(const double* eqns,const double* values,double* solutions, int dim);
+	q = -(3*a1-a2*a2)/9;
+	r = -(9*a2*a1-27*a0-2*a2*a2*a2)/54;
+	r2 = r*r;
+	q3 = q*q*q;
 
+	if(r2<q3)
+	{
+		double sqrQ = sqrt(q);
+		double theta = acos ( r / (sqrQ*q) );
+		double cTheta=cos(theta/3)*sqrQ;
+		double sTheta=sin(theta/3)*sqrQ*SQRT_3/2;
+		roots[0] = std::complex< double >( -2*cTheta , 0 );
+		roots[1] = std::complex< double >( -2*(-cTheta*0.5-sTheta) , 0 );
+		roots[2] = std::complex< double >( -2*(-cTheta*0.5+sTheta) , 0 );
+	}
+	else
+	{
+		double t , s1 , s2 , sqr=sqrt(r2-q3);
+		t = -r+sqr;
+		if(t<0) s1 = -pow( -t , 1.0/3 );
+		else    s1 =  pow(  t , 1.0/3 );
+		t = -r-sqr;
+		if( t<0 ) s2 = -pow( -t , 1.0/3 );
+		else      s2 =  pow(  t , 1.0/3 );
+		roots[0] = std::complex< double >( s1+s2 , 0 );
+		s1 /= 2 , s2 /= 2;
+		roots[1] = std::complex< double >( -s1-s2 ,  SQRT_3*(s1-s2) );
+		roots[2] = std::complex< double >( -s1-s2 , -SQRT_3*(s1-s2) );
+	}
+	roots[0] -= a2/3;
+	roots[1] -= a2/3;
+	roots[2] -= a2/3;
+	return 3;
+}
+// Solution taken from: http://mathworld.wolfram.com/QuarticEquation.html
+// and http://www.csit.fsu.edu/~burkardt/f_src/subpak/subpak.f90
+inline int Factor( double a4 , double a3 , double a2 , double a1 , double a0 , std::complex< double > roots[4] , double EPS )
+{
+	std::complex< double > R , D , E , R2;
+
+	if( fabs(a4)<EPS ) return Factor( a3 , a2 , a1 , a0 , roots , EPS );
+	a3 /= a4 , a2 /= a4 , a1 /= a4 , a0 /= a4;
+
+	Factor( 1.0 , -a2 , a3*a1-4.0*a0 , -a3*a3*a0+4.0*a2*a0-a1*a1 , roots , EPS );
+
+	R2 = std::complex< double >( a3*a3/4.0-a2+roots[0].real() , 0 );
+	R = sqrt( R2 );
+	if( fabs( R.real() )>10e-8 )
+	{
+		std::complex< double > temp1 , temp2 , p1 , p2;
+
+		p1 = std::complex< double >( a3*a3*0.75-2.0*a2-R2.real() , 0 );
+
+		temp2 = std::complex< double >( (4.0*a3*a2-8.0*a1-a3*a3*a3)/4.0 , 0 );
+		p2 = temp2 / R;
+		temp1 = p1+p2;
+		temp2 = p1-p2;
+		D = sqrt( temp1 );
+		E = sqrt( temp2 );
+	}
+	else
+	{
+		R = std::complex< double >( 0 , 0 );
+		std::complex< double > temp1 , temp2;
+		temp1 = std::complex< double >( roots[0].real()*roots[0].real()-4.0*a0 , 0 );
+		temp2 = sqrt( temp1 );
+
+		temp1 = std::complex< double >( a3*a3*0.75-2.0*a2+2.0*temp2.real() ,  2.0*temp2.imag() );
+		D = sqrt( temp1 );
+
+		temp1 = std::complex< double >( a3*a3*0.75-2.0*a2-2.0*temp2.real() , -2.0*temp2.imag() );
+		E = sqrt( temp1 );
+	}
+
+	roots[0] =  R/2. + D/2. - a3/4;
+	roots[1] =  R/2. - D/2. - a3/4;
+	roots[2] = -R/2. + E/2. - a3/4;
+	roots[3] = -R/2. - E/2. - a3/4;
+
+	return 4;
+}
 #endif // FACTOR_INCLUDED
\ No newline at end of file
diff --git a/Src/Geometry.cpp b/Src/Geometry.cpp
deleted file mode 100644
index e6d03d8..0000000
--- a/Src/Geometry.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-#include "Geometry.h"
-#include <stdio.h>
-#include <string.h>
-#ifdef _WIN32
-#include <io.h>
-#endif // _WIN32
-
-///////////////////
-// CoredMeshData //
-///////////////////
-
-TriangulationEdge::TriangulationEdge(void){pIndex[0]=pIndex[1]=tIndex[0]=tIndex[1]=-1;}
-TriangulationTriangle::TriangulationTriangle(void){eIndex[0]=eIndex[1]=eIndex[2]=-1;}
-
-///////////////////////////
-// BufferedReadWriteFile //
-///////////////////////////
-BufferedReadWriteFile::BufferedReadWriteFile( const char* fileName , const char* fileHeader , int bufferSize )
-{
-	_bufferIndex = 0;
-	_bufferSize = bufferSize;
-	if( fileName ) strcpy( _fileName , fileName ) , tempFile = false , _fp = fopen( _fileName , "w+b" );
-	else
-	{
-		if( fileHeader && strlen(fileHeader) ) sprintf( _fileName , "%sXXXXXX" , fileHeader );
-		else strcpy( _fileName , "XXXXXX" );
-#ifdef _WIN32
-		_mktemp( _fileName );
-		_fp = fopen( _fileName , "w+b" );
-#else // !_WIN32
-		_fp = fdopen( mkstemp( _fileName ) , "w+b" );
-#endif // _WIN32
-		tempFile = true;
-	}
-	if( !_fp ) fprintf( stderr , "[ERROR] Failed to open file: %s\n" , _fileName ) , exit( 0 );
-	_buffer = (char*) malloc( _bufferSize );
-}
-BufferedReadWriteFile::~BufferedReadWriteFile( void )
-{
-	free( _buffer );
-	fclose( _fp );
-	if( tempFile ) remove( _fileName );
-}
-void BufferedReadWriteFile::reset( void )
-{
-	if( _bufferIndex ) fwrite( _buffer , 1 , _bufferIndex , _fp );
-	_bufferIndex = 0;
-	fseek( _fp , 0 , SEEK_SET );
-	_bufferIndex = 0;
-	_bufferSize = fread( _buffer , 1 , _bufferSize , _fp );
-}
-bool BufferedReadWriteFile::write( const void* data , size_t size )
-{
-	if( !size ) return true;
-	char* _data = (char*) data;
-	size_t sz = _bufferSize - _bufferIndex;
-	while( sz<=size )
-	{
-		memcpy( _buffer+_bufferIndex , _data , sz );
-		fwrite( _buffer , 1 , _bufferSize , _fp );
-		_data += sz;
-		size -= sz;
-		_bufferIndex = 0;
-		sz = _bufferSize;
-	}
-	if( size )
-	{
-		memcpy( _buffer+_bufferIndex , _data , size );
-		_bufferIndex += size;
-	}
-	return true;
-}
-bool BufferedReadWriteFile::read( void* data , size_t size )
-{
-	if( !size ) return true;
-	char *_data = (char*) data;
-	size_t sz = _bufferSize - _bufferIndex;
-	while( sz<=size )
-	{
-		if( size && !_bufferSize ) return false;
-		memcpy( _data , _buffer+_bufferIndex , sz );
-		_bufferSize = fread( _buffer , 1 , _bufferSize , _fp );
-		_data += sz;
-		size -= sz;
-		_bufferIndex = 0;
-		if( !size ) return true;
-		sz = _bufferSize;
-	}
-	if( size )
-	{
-		if( !_bufferSize ) return false;
-		memcpy( _data , _buffer+_bufferIndex , size );
-		_bufferIndex += size;
-	}
-	return true;
-}
\ No newline at end of file
diff --git a/Src/Geometry.h b/Src/Geometry.h
index ea8c951..7faf508 100644
--- a/Src/Geometry.h
+++ b/Src/Geometry.h
@@ -34,163 +34,240 @@ DAMAGE.
 #include <vector>
 #include <stdlib.h>
 #include <unordered_map>
+#include <string.h>
+#ifdef _WIN32
+#include <io.h>
+#endif // _WIN32
 
-template<class Real>
-Real Random(void);
+template< class Real > Real Random( void );
 
-template< class Real >
-struct Point3D
-{
-	Real coords[3];
-	Point3D( void ) { coords[0] = coords[1] = coords[2] = Real(0); }
-	Point3D( Real v ) { coords[0] = coords[1] = coords[2] = v; }
-	template< class _Real > Point3D( _Real v0 , _Real v1 , _Real v2 ){ coords[0] = Real(v0) , coords[1] = Real(v1) , coords[2] = Real(v2); }
-	template< class _Real > Point3D( const Point3D< _Real >& p ){ coords[0] = Real( p[0] ) , coords[1] = Real( p[1] ) , coords[2] = Real( p[2] ); }
-	inline       Real& operator[] ( int i )       { return coords[i]; }
-	inline const Real& operator[] ( int i ) const { return coords[i]; }
-	inline Point3D  operator - ( void ) const { Point3D q ; q.coords[0] = -coords[0] , q.coords[1] = -coords[1] , q.coords[2] = -coords[2] ; return q; }
-
-	template< class _Real > inline Point3D& operator += ( Point3D< _Real > p ){ coords[0] += Real(p.coords[0]) , coords[1] += Real(p.coords[1]) , coords[2] += Real(p.coords[2]) ; return *this; }
-	template< class _Real > inline Point3D  operator +  ( Point3D< _Real > p ) const { Point3D q ; q.coords[0] = coords[0] + Real(p.coords[0]) , q.coords[1] = coords[1] + Real(p.coords[1]) , q.coords[2] = coords[2] + Real(p.coords[2]) ; return q; }
-	template< class _Real > inline Point3D& operator *= ( _Real r ) { coords[0] *= Real(r) , coords[1] *= Real(r) , coords[2] *= Real(r) ; return *this; }
-	template< class _Real > inline Point3D  operator *  ( _Real r ) const { Point3D q ; q.coords[0] = coords[0] * Real(r) , q.coords[1] = coords[1] * Real(r) , q.coords[2] = coords[2] * Real(r) ; return q; }
-
-	template< class _Real > inline Point3D& operator -= ( Point3D< _Real > p ){ return ( (*this)+=(-p) ); }
-	template< class _Real > inline Point3D  operator -  ( Point3D< _Real > p ) const { return (*this)+(-p); }
-	template< class _Real > inline Point3D& operator /= ( _Real r ){ return ( (*this)*=Real(1./r) ); }
-	template< class _Real > inline Point3D  operator /  ( _Real r ) const { return (*this) * ( Real(1.)/r ); }
+template< class Real , unsigned int Dim > struct XForm;
 
-	static Real Dot( const Point3D< Real >& p1 , const Point3D< Real >& p2 ){ return p1.coords[0]*p2.coords[0] + p1.coords[1]*p2.coords[1] + p1.coords[2]*p2.coords[2]; }
-	template< class Real1 , class Real2 >
-	static Real Dot( const Point3D< Real1 >& p1 , const Point3D< Real2 >& p2 ){ return Real( p1.coords[0]*p2.coords[0] + p1.coords[1]*p2.coords[1] + p1.coords[2]*p2.coords[2] ); }
-};
-
-template< class Real >
-struct XForm3x3
+template< class Real , unsigned int Dim >
+struct Point
 {
-	Real coords[3][3];
-	XForm3x3( void ) { for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ )  coords[i][j] = Real(0.); }
-	static XForm3x3 Identity( void )
-	{
-		XForm3x3 xForm;
-		xForm(0,0) = xForm(1,1) = xForm(2,2) = Real(1.);
-		return xForm;
-	}
-	Real& operator() ( int i , int j ){ return coords[i][j]; }
-	const Real& operator() ( int i , int j ) const { return coords[i][j]; }
-	template< class _Real > Point3D< _Real > operator * ( const Point3D< _Real >& p ) const
+	void _init( int d )
 	{
-		Point3D< _Real > q;
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) q[i] += _Real( coords[j][i] * p[j] );
-		return q;
+		if( !d ) memset( coords , 0 , sizeof(Real)*Dim );
+		else ERROR_OUT( "Should never be called" );
 	}
-	XForm3x3 operator * ( const XForm3x3& m ) const
+	template< class _Real , class ... _Reals > void _init( int d , _Real v , _Reals ... values )
 	{
-		XForm3x3 n;
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) n.coords[i][j] += m.coords[i][k]*coords[k][j];
-		return n;
+		coords[d] = (Real)v;
+		if( d+1<Dim ) _init( d+1 , values... );
 	}
-	XForm3x3 transpose( void ) const
+	template< class ... Points >
+	static void _AddColumnVector( XForm< Real , Dim >& x , int c , Point point , Points ... points )
 	{
-		XForm3x3 xForm;
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) xForm( i , j ) = coords[j][i];
-		return xForm;
+		for( int r=0 ; r<Dim ; r++ ) x( c , r ) = point[r];
+		_AddColumnVector( x , c+1 , points ... );
 	}
-	Real subDeterminant( int i , int j ) const
+	static void _AddColumnVector( XForm< Real , Dim >& x , int c ){ ; }
+public:
+	Real coords[Dim];
+	Point( void ) { memset( coords , 0 , sizeof(Real)*Dim ); }
+	Point( const Point& p ){ memcpy( coords , p.coords , sizeof(Real)*Dim ); }
+	template< class ... _Reals > Point( _Reals ... values ){ static_assert( sizeof...(values)==Dim || sizeof...(values)==0 , "[ERROR] Point::Point: Invalid number of coefficients" ) ; _init( 0 , values... ); }
+	template< class _Real > Point( const Point< _Real , Dim >& p ){ for( int d=0 ; d<Dim ; d++ ) coords[d] = (Real) p.coords[d]; }
+	inline       Real& operator[] ( int i )       { return coords[i]; }
+	inline const Real& operator[] ( int i ) const { return coords[i]; }
+	inline Point  operator - ( void ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = - coords[d] ; return q; }
+
+
+	template< class _Real > inline Point& operator += ( Point< _Real , Dim > p )       { for( int d=0 ; d<Dim ; d++ ) coords[d] += (Real)p.coords[d] ; return *this; }
+	template< class _Real > inline Point  operator +  ( Point< _Real , Dim > p ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = coords[d] + (Real)p.coords[d] ; return q; }
+	template< class _Real > inline Point& operator -= ( Point< _Real , Dim > p )       { return (*this)+=(-p); }
+	template< class _Real > inline Point  operator -  ( Point< _Real , Dim > p ) const { return (*this)+ (-p); }
+	template< class Scalar > inline Point& operator *= ( Scalar r )       { for( int d=0 ; d<Dim ; d++ ) coords[d] *= r ; return *this; }
+	template< class Scalar > inline Point  operator *  ( Scalar r ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = coords[d] * r ; return q; }
+	template< class Scalar > inline Point& operator /= ( Scalar r )       { for( int d=0 ; d<Dim ; d++ ) coords[d] /= r ; return *this; }
+	template< class Scalar > inline Point  operator /  ( Scalar r ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = coords[d] / r ; return q; }
+	template< class _Real > inline Point& operator *= ( Point< _Real , Dim > p )       { for( int d=0 ; d<Dim ; d++ ) coords[d] *= p.coords[d] ; return *this; }
+	template< class _Real > inline Point  operator *  ( Point< _Real , Dim > p ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = coords[d] * p.coords[d] ; return q; }
+	template< class _Real > inline Point& operator /= ( Point< _Real , Dim > p )       { for( int d=0 ; d<Dim ; d++ ) coords[d] /= p.coords[d] ; return *this; }
+	template< class _Real > inline Point  operator /  ( Point< _Real , Dim > p ) const { Point q ; for( int d=0 ; d<Dim ; d++ ) q.coords[d] = coords[d] / p.coords[d] ; return q; }
+
+	static Real Dot( const Point& p1 , const Point& p2 ){ Real dot = {} ; for( int d=0 ; d<Dim ; d++ ) dot += p1.coords[d] * p2.coords[d] ; return dot; }
+	static Real SquareNorm( const Point& p ){ return Dot( p , p ); }
+	template< class ... Points > static Point CrossProduct( Points ... points )
 	{
-		int i1 = (i+1)%3 , i2 = (i+2)%3;
-		int j1 = (j+1)%3 , j2 = (j+2)%3;
-		return coords[i1][j1] * coords[i2][j2] - coords[i1][j2] * coords[i2][j1];
+		static_assert( sizeof ... ( points )==Dim-1 , "Number of points in cross-product must be one less than the dimension" );
+		XForm< Real , Dim > x;
+		_AddColumnVector( x , 0 , points ... );
+		Point p;
+		for( int d=0 ; d<Dim ; d++ ) p[d] = ( d&1 ) ? -x.subDeterminant( Dim-1 , d ) : x.subDeterminant( Dim-1 , d );
+		return p;
 	}
-	Real determinant( void ) const { return coords[0][0] * subDeterminant( 0 , 0 ) + coords[1][0] * subDeterminant( 1 , 0 ) + coords[2][0] * subDeterminant( 2 , 0 ); }
-	XForm3x3 inverse( void ) const
+	static Point CrossProduct( const Point* points )
 	{
-		XForm3x3 xForm;
-		Real d = determinant();
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ;j++ ) xForm.coords[j][i] =  subDeterminant( i , j ) / d;
-		return xForm;
+		XForm< Real , Dim > x;
+		for( int d=0 ; d<Dim-1 ; d++ ) for( int c=0 ; c<Dim ; c++ ) x(d,c) = points[d][c];
+		Point p;
+		for( int d=0 ; d<Dim ; d++ ) p[d] = ( d&1 ) ? -x.subDeterminant( Dim-1 , d ) : x.subDeterminant( Dim-1 , d );
+		return p;
 	}
+	static Point CrossProduct( Point* points ){ return CrossProduct( ( const Point* )points ); }
 };
+template< class Real , unsigned int Dim > Point< Real , Dim > operator * ( Real r , Point< Real , Dim > p ){ return p*r; }
+template< class Real , unsigned int Dim > Point< Real , Dim > operator / ( Real r , Point< Real , Dim > p ){ return p/r; }
 
-template< class Real >
-struct XForm4x4
+template< class Real , unsigned int _Columns , unsigned int _Rows >
+struct Matrix
 {
-	Real coords[4][4];
-	XForm4x4( void ) { for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ )  coords[i][j] = Real(0.); }
-	static XForm4x4 Identity( void )
+	static const unsigned int Columns = _Columns;
+	static const unsigned int Rows = _Rows;
+	Real coords[Columns][Rows];
+	Matrix( void ) { memset( coords , 0 , sizeof(coords) ); }
+	inline       Real& operator() ( int c , int r )       { return coords[c][r]; }
+	inline const Real& operator() ( int c , int r ) const { return coords[c][r]; }
+	inline       Real* operator[] ( int c         )       { return coords[c]   ; }
+	inline const Real* operator[] ( int c         ) const { return coords[c]   ; }
+
+	inline Matrix  operator - ( void ) const { Matrix m ; for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) m.coords[c][r] = - coords[c][r] ; return m; }
+
+	inline Matrix& operator += ( const Matrix& m ){ for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) coords[c][r] += m.coords[c][r] ; return *this; }
+	inline Matrix  operator +  ( const Matrix& m ) const { Matrix n ; for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) n.coords[c][r] = coords[c][r] + m.coords[c][r] ; return n; }
+	inline Matrix& operator *= ( Real s ) { for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) coords[c][r] *= s ; return *this; }
+	inline Matrix  operator *  ( Real s ) const { Matrix n ; for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) n.coords[c][r] = coords[c][r] * s ; return n; }
+
+	inline Matrix& operator -= ( const Matrix& m ){ return ( (*this)+=(-m) ); }
+	inline Matrix  operator -  ( const Matrix& m ) const { return (*this)+(-m); }
+	inline Matrix& operator /= ( Real s ){ return ( (*this)*=(Real)(1./s) ); }
+	inline Matrix  operator /  ( Real s ) const { return (*this) * ( (Real)(1./s) ); }
+
+	static Real Dot( const Matrix& m1 , const Matrix& m2 ){ Real dot = (Real)0 ; for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) dot += m1.coords[c][r] * m2.coords[c][r] ; return dot; }
+
+	template< typename T >
+	inline Point< T , Rows > operator* ( const Point< T , Columns >& p ) const { Point< T , Rows > q ; for( int c=0 ; c<Columns ; c++ ) for( int r=0 ; r<Rows ; r++ ) q[r] += (T)( p[c] * coords[c][r] ) ; return q; }
+};
+
+template< class Real , unsigned int Dim >
+struct XForm
+{
+	Real coords[Dim][Dim];
+	XForm( void ) { memset( coords , 0 , sizeof(Real) * Dim * Dim ); }
+	static XForm Identity( void )
 	{
-		XForm4x4 xForm;
-		xForm(0,0) = xForm(1,1) = xForm(2,2) = xForm(3,3) = Real(1.);
+		XForm xForm;
+		for( int d=0 ; d<Dim ; d++ ) xForm(d,d) = (Real)1.;
 		return xForm;
 	}
 	Real& operator() ( int i , int j ){ return coords[i][j]; }
 	const Real& operator() ( int i , int j ) const { return coords[i][j]; }
-	template< class _Real > Point3D< _Real > operator * ( const Point3D< _Real >& p ) const
+	template< class _Real > Point< _Real , Dim-1 > operator * ( const Point< _Real , Dim-1 >& p ) const
 	{
-		Point3D< _Real > q;
-		for( int i=0 ; i<3 ; i++ )
+		Point< _Real , Dim-1 > q;
+		for( int i=0 ; i<Dim-1 ; i++ )
 		{
-			for( int j=0 ; j<3 ; j++ ) q[i] += (_Real)( coords[j][i] * p[j] );
-			q[i] += (_Real)coords[3][i];
+			for( int j=0 ; j<Dim-1 ; j++ ) q[i] += (_Real)( coords[j][i] * p[j] );
+			q[i] += (_Real)coords[Dim-1][i];
 		}
 		return q;
 	}
-	XForm4x4 operator * ( const XForm4x4& m ) const
+	template< class _Real > Point< _Real , Dim > operator * ( const Point< _Real , Dim >& p ) const
+	{
+		Point< _Real , Dim > q;
+		for( int i=0 ; i<Dim ; i++ ) for( int j=0 ; j<Dim ; j++ ) q[i] += (_Real)( coords[j][i] * p[j] );
+		return q;
+	}
+	XForm operator * ( const XForm& m ) const
 	{
-		XForm4x4 n;
-		for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ ) for( int k=0 ; k<4 ; k++ ) n.coords[i][j] += m.coords[i][k]*coords[k][j];
+		XForm n;
+		for( int i=0 ; i<Dim ; i++ ) for( int j=0 ; j<Dim ; j++ ) for( int k=0 ; k<Dim ; k++ ) n.coords[i][j] += m.coords[i][k]*coords[k][j];
 		return n;
 	}
-	XForm4x4 transpose( void ) const
+	XForm transpose( void ) const
 	{
-		XForm4x4 xForm;
-		for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ ) xForm( i , j ) = coords[j][i];
+		XForm xForm;
+		for( int i=0 ; i<Dim ; i++ ) for( int j=0 ; j<Dim ; j++ ) xForm( i , j ) = coords[j][i];
 		return xForm;
 	}
-	Real subDeterminant( int i , int j ) const
+	Real determinant( void ) const
 	{
-		XForm3x3< Real > xForm;
-		int ii[] = { (i+1)%4 , (i+2)%4 , (i+3)%4 } , jj[] = { (j+1)%4 , (j+2)%4 , (j+3)%4 };
-		for( int _i=0 ; _i<3 ; _i++ ) for( int _j=0 ; _j<3 ; _j++ ) xForm( _i , _j ) = coords[ ii[_i] ][ jj[_j] ];
-		return xForm.determinant();
+		Real det = (Real)0.;
+		for( int d=0 ; d<Dim ; d++ ) 
+			if( d&1 ) det -= coords[d][0] * subDeterminant( d , 0 );
+			else      det += coords[d][0] * subDeterminant( d , 0 );
+		return det;
 	}
-	Real determinant( void ) const { return coords[0][0] * subDeterminant( 0 , 0 ) - coords[1][0] * subDeterminant( 1 , 0 ) + coords[2][0] * subDeterminant( 2 , 0 ) - coords[3][0] * subDeterminant( 3 , 0 ); }
-	XForm4x4 inverse( void ) const
+	XForm inverse( void ) const
 	{
-		XForm4x4 xForm;
+		XForm xForm;
 		Real d = determinant();
-		for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ;j++ )
+		for( int i=0 ; i<Dim ; i++ ) for( int j=0 ; j<Dim ; j++ )
 			if( (i+j)%2==0 ) xForm.coords[j][i] =  subDeterminant( i , j ) / d;
 			else             xForm.coords[j][i] = -subDeterminant( i , j ) / d;
 		return xForm;
 	}
-};
+	Real subDeterminant( int i , int j ) const
+	{
+		XForm< Real , Dim-1 > xForm;
+		int ii[Dim-1] , jj[Dim-1];
+		for( int a=0 , _i=0 , _j=0 ; a<Dim ; a++ )
+		{
+			if( a!=i ) ii[_i++] = a;
+			if( a!=j ) jj[_j++] = a;
+		}
+		for( int _i=0 ; _i<Dim-1 ; _i++ ) for( int _j=0 ; _j<Dim-1 ; _j++ ) xForm( _i , _j ) = coords[ ii[_i] ][ jj[_j] ];
+		return xForm.determinant();
+	}
 
-template< class Real >
-struct OrientedPoint3D
+	inline XForm  operator - ( void ) const { XForm m ; for( int c=0 ; c<Dim ; c++ ) for( int r=0 ; r<Dim ; r++ ) m.coords[c][r] = - coords[c][r] ; return m; }
+
+	inline XForm& operator += ( const XForm& m ){ for( int c=0 ; c<Dim ; c++ ) for( int r=0 ; r<Dim ; r++ ) coords[c][r] += m.coords[c][r] ; return *this; }
+	inline XForm  operator +  ( const XForm& m ) const { XForm n ; for( int c=0 ; c<Dim ; c++ ) for( int r=0 ; r<Dim ; r++ ) n.coords[c][r] = coords[c][r] + m.coords[c][r] ; return n; }
+	inline XForm& operator *= ( Real s ) { for( int c=0 ; c<Dim ; c++ ) for( int r=0 ; r<Dim ; r++ ) coords[c][r] *= s ; return *this; }
+	inline XForm  operator *  ( Real s ) const { XForm n ; for( int c=0 ; c<Dim ; c++ ) for( int r=0 ; r<Dim ; r++ ) n.coords[c][r] = coords[c][r] * s ; return n; }
+
+	inline XForm& operator -= ( const XForm& m ){ return ( (*this)+=(-m) ); }
+	inline XForm  operator -  ( const XForm& m ) const { return (*this)+(-m); }
+	inline XForm& operator /= ( Real s ){ return ( (*this)*=(Real)(1./s) ); }
+	inline XForm  operator /  ( Real s ) const { return (*this) * ( (Real)(1./s) ); }
+};
+template<>
+inline XForm< float , 1 > XForm< float , 1 >::inverse( void ) const
+{
+	XForm< float , 1 > x;
+	x.coords[0][0] = (float)(1./coords[0][0] );
+	return x;
+}
+template<>
+inline XForm< double , 1 > XForm< double , 1 >::inverse( void ) const
+{
+	XForm< double , 1 > x;
+	x.coords[0][0] = (double)(1./coords[0][0] );
+	return x;
+}
+template<> inline float  XForm< float  , 1 >::determinant( void ) const { return coords[0][0]; }
+template<> inline double XForm< double , 1 >::determinant( void ) const { return coords[0][0]; }
+
+template< class Real , unsigned int Dim >
+struct OrientedPoint
 {
-	Point3D< Real > p , n;
-	OrientedPoint3D( Point3D< Real > pp=Point3D< Real >() , Point3D< Real > nn=Point3D< Real >() ) : p(pp) , n(nn) { ; }
-	template< class _Real > OrientedPoint3D( const OrientedPoint3D< _Real >& p ) : OrientedPoint3D( Point3D< Real >( p.p ) , Point3D< Real >( p.n ) ){ ; }
-
-	template< class _Real > inline OrientedPoint3D& operator += ( OrientedPoint3D< _Real > _p ){ p += _p.p , n += _p.n ; return *this; }
-	template< class _Real > inline OrientedPoint3D  operator +  ( OrientedPoint3D< _Real > _p ) const { return OrientedPoint3D< Real >( p+_p.p , n+_p.n ); }
-	template< class _Real > inline OrientedPoint3D& operator *= ( _Real r ) { p *= r , n *= r ; return *this; }
-	template< class _Real > inline OrientedPoint3D  operator *  ( _Real r ) const { return OrientedPoint3D< Real >( p*r , n*r ); }
-
-	template< class _Real > inline OrientedPoint3D& operator -= ( OrientedPoint3D< _Real > p ){ return ( (*this)+=(-p) ); }
-	template< class _Real > inline OrientedPoint3D  operator -  ( OrientedPoint3D< _Real > p ) const { return (*this)+(-p); }
-	template< class _Real > inline OrientedPoint3D& operator /= ( _Real r ){ return ( (*this)*=Real(1./r) ); }
-	template< class _Real > inline OrientedPoint3D  operator /  ( _Real r ) const { return (*this) * ( Real(1.)/r ); }
+	Point< Real , Dim > p , n;
+	OrientedPoint( Point< Real , Dim > pp = Point< Real , Dim >() , Point< Real , Dim > nn=Point< Real , Dim >() ) : p(pp) , n(nn) { ; }
+	template< class _Real > OrientedPoint( const OrientedPoint< _Real , Dim>& p ) : OrientedPoint( Point< Real , Dim >( p.p ) , Point< Real , Dim >( p.n ) ){ ; }
+
+	template< class _Real > inline OrientedPoint& operator += ( OrientedPoint< _Real , Dim > _p ){ p += _p.p , n += _p.n ; return *this; }
+	template< class _Real > inline OrientedPoint  operator +  ( OrientedPoint< _Real , Dim > _p ) const { return OrientedPoint< Real , Dim >( p+_p.p , n+_p.n ); }
+	template< class _Real > inline OrientedPoint& operator *= ( _Real r ) { p *= r , n *= r ; return *this; }
+	template< class _Real > inline OrientedPoint  operator *  ( _Real r ) const { return OrientedPoint< Real , Dim >( p*r , n*r ); }
+
+	template< class _Real > inline OrientedPoint& operator -= ( OrientedPoint< _Real , Dim > p ){ return ( (*this)+=(-p) ); }
+	template< class _Real > inline OrientedPoint  operator -  ( OrientedPoint< _Real , Dim > p ) const { return (*this)+(-p); }
+	template< class _Real > inline OrientedPoint& operator /= ( _Real r ){ return ( (*this)*=Real(1./r) ); }
+	template< class _Real > inline OrientedPoint  operator /  ( _Real r ) const { return (*this) * ( Real(1.)/r ); }
 };
 
+
 template< class Data , class Real >
 struct ProjectiveData
 {
 	Data data;
 	Real weight;
-	ProjectiveData( Data d=Data(0) , Real w=Real(0) ) : data(d) , weight(w) { ; }
+	ProjectiveData( Data d=Data() , Real w=(Real)0 ) : data(d) , weight(w) { ; }
 	operator Data (){ return weight!=0 ? data/weight : data*weight; }
+	Data value( void ) const { return weight!=0 ? data/weight : data*weight; }
 	ProjectiveData& operator += ( const ProjectiveData& p ){ data += p.data , weight += p.weight ; return *this; }
 	ProjectiveData& operator -= ( const ProjectiveData& p ){ data -= p.data , weight -= p.weight ; return *this; }
 	ProjectiveData& operator *= ( Real s ){ data *= s , weight *= s ; return *this; }
@@ -201,65 +278,94 @@ struct ProjectiveData
 	ProjectiveData  operator /  ( Real s ) const { return ProjectiveData( data/s , weight/s ); }
 };
 
-template<class Real>
-Point3D<Real> RandomBallPoint(void);
-
-template<class Real>
-Point3D<Real> RandomSpherePoint(void);
-
-template<class Real>
-double Length(const Point3D<Real>& p);
-
-template<class Real>
-double SquareLength(const Point3D<Real>& p);
+template< class Real , unsigned int Dim > Point< Real , Dim > RandomBallPoint( void );
+template< class Real , unsigned int Dim > Point< Real , Dim > RandomSpherePoint( void );
+template< class Real , unsigned int Dim > Real Length( Point< Real , Dim > p ){ return (Real)sqrt( Point< Real , Dim >::SquareNorm( p ) ); }
+template< class Real , unsigned int Dim > Real SquareLength( Point< Real , Dim > p ){ return Point< Real , Dim >::SquareNorm( p ); }
+template< class Real , unsigned int Dim > Real Distance( Point< Real , Dim > p1 , Point< Real , Dim > p2 ){ return Length(p1-p2); }
+template< class Real , unsigned int Dim > Real SquareDistance( Point< Real , Dim > p1 , Point< Real , Dim > p2 ){ return SquareLength( p1-p2 ); }
+template< class Real > Point< Real , 3 > CrossProduct( Point< Real , 3 > p1 , Point< Real , 3 > p2 ){ return Point< Real , 3 >::CrossProduct( p1 , p2 ); }
 
-template<class Real>
-double Distance(const Point3D<Real>& p1,const Point3D<Real>& p2);
-
-template<class Real>
-double SquareDistance(const Point3D<Real>& p1,const Point3D<Real>& p2);
-
-template <class Real>
-void CrossProduct(const Point3D<Real>& p1,const Point3D<Real>& p2,Point3D<Real>& p);
-
-
-class Edge{
-public:
-	double p[2][2];
-	double Length(void) const{
-		double d[2];
-		d[0]=p[0][0]-p[1][0];
-		d[1]=p[0][1]-p[1][1];
-
-		return sqrt(d[0]*d[0]+d[1]*d[1]);
+template< class Real , unsigned int Dim > Real SquareArea( Point< Real , Dim > p1 , Point< Real , Dim > p2 , Point< Real , Dim > p3 )
+{
+	Point< Real , Dim > v1 = p2-p1 , v2 = p3-p1;
+	// Area^2 = ( |v1|^2 * |v2|^2 * sin^2( < v1 ,v2 ) ) / 4
+	//        = ( |v1|^2 * |v2|^2 * ( 1 - cos^2( < v1 ,v2 ) ) ) / 4
+	//        = ( |v1|^2 * |v2|^2 * ( 1 - < v1 , v2 >^2 / ( |v1|^2 * |v2|^2 ) ) ) / 4
+	//        = ( |v1|^2 * |v2|^2 - < v1 , v2 >^2 ) / 4
+	Real dot = Point< Real , Dim >::Dot( v1 , v2 );
+	Real l1 = Point< Real , Dim >::SquareNorm( v1 ) , l2 = Point< Real , Dim >::SquareNorm( v2 );
+	return ( l1 * l2 - dot * dot ) / 4;
+}
+template< class Real , unsigned int Dim > Real Area( Point< Real , Dim > p1 , Point< Real , Dim > p2 , Point< Real , Dim > p3 ){ return (Real)sqrt( SquareArea( p1 , p2 , p3 ) ); }
+
+template< unsigned int K > struct Factorial{ static const unsigned long long Value = Factorial< K-1 >::Value * K; };
+template<> struct Factorial< 0 >{ static const unsigned long long Value = 1; };
+
+template< class Real , unsigned int Dim , unsigned int K >
+struct Simplex
+{
+	Point< Real , Dim > p[K+1];
+	Simplex( void ){ static_assert( K<=Dim , "[ERROR] Bad simplex dimension" ); }
+	Point< Real , Dim >& operator[]( int k ){ return p[k]; }
+	const Point< Real , Dim >& operator[]( int k ) const { return p[k]; }
+	Real measure( void ) const { return (Real)sqrt( squareMeasure() ); }
+	Real squareMeasure( void ) const
+	{
+		XForm< Real , K > mass;
+		for( int i=1 ; i<=K ; i++ ) for( int j=1 ; j<=K ; j++ ) mass(i-1,j-1) = Point< Real , Dim >::Dot( p[i]-p[0] , p[j]-p[0] );
+		return mass.determinant() / ( Factorial< K >::Value * Factorial< K >::Value );
 	}
+	Point< Real , Dim > center( void ) const
+	{
+		Point< Real , Dim > c;
+		for( int k=0 ; k<=K ; k++ ) c += p[k];
+		return c / (K+1);
+	}
+	void split( Point< Real , Dim > pNormal , Real pOffset , std::vector< Simplex >& back , std::vector< Simplex >& front ) const;
 };
-class Triangle{
-public:
-	double p[3][3];
-	double Area(void) const{
-		double v1[3] , v2[3] , v[3];
-		for( int d=0 ; d<3 ; d++ )
-		{
-			v1[d] = p[1][d] - p[0][d];
-			v2[d] = p[2][d] - p[0][d];
-		}
-		v[0] =  v1[1]*v2[2] - v1[2]*v2[1];
-		v[1] = -v1[0]*v2[2] + v1[2]*v2[0];
-		v[2] =  v1[0]*v2[1] - v1[1]*v2[0];
-		return sqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] ) / 2;
+template< class Real , unsigned int Dim >
+struct Simplex< Real , Dim , 0 >
+{
+	Point< Real , Dim > p[1];
+	Point< Real , Dim >& operator[]( int k ){ return p[k]; }
+	const Point< Real , Dim >& operator[]( int k ) const { return p[k]; }
+	Real squareMeasure( void ) const { return (Real)1.; }
+	Real measure( void ) const { return (Real)1.; }
+	Point< Real , Dim > center( void ) const { return p[0]; }
+	void split( Point< Real , Dim > pNormal , Real pOffset , std::vector< Simplex >& back , std::vector< Simplex >& front ) const
+	{
+		if( Point< Real , Dim >::Dot( p[0] , pNormal ) < pOffset ) back.push_back( *this );
+		else                                                       front.push_back( *this );
 	}
-	double AspectRatio(void) const{
-		double d=0;
-		int i,j;
-		for(i=0;i<3;i++){
-	  for(i=0;i<3;i++)
-			for(j=0;j<3;j++){d+=(p[(i+1)%3][j]-p[i][j])*(p[(i+1)%3][j]-p[i][j]);}
-		}
-		return Area()/d;
+};
+template< class Real , unsigned int Dim > using Edge = Simplex< Real , Dim , 1 >;
+template< class Real , unsigned int Dim > using Triangle = Simplex< Real , Dim , 2 >;
+
+template< unsigned int K >
+struct SimplexIndex
+{
+	int idx[K+1];
+	template< class ... Ints >
+	SimplexIndex( Ints ... values ){ static_assert( sizeof...(values)==K+1 || sizeof...(values)==0 , "[ERROR] Invalid number of coefficients" ) ; _init( 0 , values ... ); }
+	SimplexIndex( int i0 , int i1 , int i2 ){ idx[0] = i0 , idx[1] = i1 , idx[2] = i2; }
+	int& operator[] ( int i ) { return idx[i] ;}
+	const int& operator[] ( int i ) const { return idx[i]; }
+protected:
+	void _init( int k )
+	{
+		if( !k ) memset( idx , 0 , sizeof(idx) );
+		else ERROR_OUT( "Should never be called" );
+	}
+	template< class ... Ints > void _init( int k , int v , Ints ... values )
+	{
+		idx[k] = v;
+		if( k<=K ) _init( k+1 , values ... );
 	}
-	
 };
+typedef SimplexIndex< 1 > EdgeIndex;
+typedef SimplexIndex< 2 > TriangleIndex;
+
 class CoredPointIndex
 {
 public:
@@ -269,24 +375,12 @@ class CoredPointIndex
 	int operator == (const CoredPointIndex& cpi) const {return (index==cpi.index) && (inCore==cpi.inCore);};
 	int operator != (const CoredPointIndex& cpi) const {return (index!=cpi.index) || (inCore!=cpi.inCore);};
 };
-class EdgeIndex{
-public:
-	int idx[2];
-};
-class CoredEdgeIndex
-{
-public:
-	CoredPointIndex idx[2];
-};
-class TriangleIndex{
-public:
-	int idx[3];
-};
+struct CoredEdgeIndex{ CoredPointIndex idx[2]; };
 
 class TriangulationEdge
 {
 public:
-	TriangulationEdge(void);
+	TriangulationEdge( void ){ pIndex[0] = pIndex[1] = tIndex[0] = tIndex[1] = -1; }
 	int pIndex[2];
 	int tIndex[2];
 };
@@ -294,54 +388,66 @@ class TriangulationEdge
 class TriangulationTriangle
 {
 public:
-	TriangulationTriangle(void);
+	TriangulationTriangle( void ){ eIndex[0] = eIndex[1] = eIndex[2] = -1; }
 	int eIndex[3];
 };
 
-template<class Real>
+template< class Real , unsigned int Dim >
 class Triangulation
 {
 public:
-
-	std::vector<Point3D<Real> >		points;
-	std::vector<TriangulationEdge>				edges;
-	std::vector<TriangulationTriangle>			triangles;
-
-	int factor( int tIndex,int& p1,int& p2,int& p3);
-	double area(void);
-	double area( int tIndex );
-	double area( int p1 , int p2 , int p3 );
-	int flipMinimize( int eIndex);
+	std::vector< Point< Real , Dim > > points;
+	std::vector< TriangulationEdge > edges;
+	std::vector< TriangulationTriangle > triangles;
+
+	int factor( int tIndex , int& p1 , int& p2 , int& p3 ) const;
+	Real area( void ) const;
+	Real area( int tIndex ) const ;
+	Real area( int p1 , int p2 , int p3 ) const;
+	int flipMinimize( int eIndex );
 	int addTriangle( int p1 , int p2 , int p3 );
 
 protected:
-	std::unordered_map<long long, int> edgeMap;
+	std::unordered_map< long long , int > edgeMap;
 	static long long EdgeIndex( int p1 , int p2 );
-	double area(const Triangle& t);
+	Real area( const Triangle< Real , Dim >& t ) const;
 };
 
-
-template<class Real>
-void EdgeCollapse(const Real& edgeRatio,std::vector<TriangleIndex>& triangles,std::vector< Point3D<Real> >& positions,std::vector<Point3D<Real> >* normals);
-template<class Real>
-void TriangleCollapse(const Real& edgeRatio,std::vector<TriangleIndex>& triangles,std::vector<Point3D<Real> >& positions,std::vector<Point3D<Real> >* normals);
-
 struct CoredVertexIndex
 {
 	int idx;
 	bool inCore;
 };
 template< class Vertex >
+class CoredCurveData
+{
+public:
+	std::vector< Vertex > inCorePoints;
+	virtual void resetIterator( void ) = 0;
+
+	virtual int addOutOfCorePoint( const Vertex& p ) = 0;
+	virtual int addOutOfCorePoint_s( const Vertex& p ) = 0;
+	virtual void addEdge_s( CoredVertexIndex v1 , CoredVertexIndex v2 ) = 0;
+	virtual void addEdge_s( int v1 , int v2 ) = 0;
+
+	virtual int nextOutOfCorePoint( Vertex& p )=0;
+	virtual int nextEdge( CoredVertexIndex& v1 , CoredVertexIndex& v2 ) = 0;
+
+	virtual int outOfCorePointCount(void)=0;
+	virtual int edgeCount( void ) = 0;
+};
+template< class Vertex >
 class CoredMeshData
 {
 public:
+	virtual ~CoredMeshData( void ){}
 	std::vector< Vertex > inCorePoints;
 	virtual void resetIterator( void ) = 0;
 
 	virtual int addOutOfCorePoint( const Vertex& p ) = 0;
 	virtual int addOutOfCorePoint_s( const Vertex& p ) = 0;
-	virtual int addPolygon_s( const std::vector< CoredVertexIndex >& vertices ) = 0;
-	virtual int addPolygon_s( const std::vector< int >& vertices ) = 0;
+	virtual void addPolygon_s( const std::vector< CoredVertexIndex >& vertices ) = 0;
+	virtual void addPolygon_s( const std::vector< int >& vertices ) = 0;
 
 	virtual int nextOutOfCorePoint( Vertex& p )=0;
 	virtual int nextPolygon( std::vector< CoredVertexIndex >& vertices ) = 0;
@@ -350,11 +456,36 @@ class CoredMeshData
 	virtual int polygonCount( void ) = 0;
 };
 
+template< class Vertex >
+class CoredVectorCurveData : public CoredCurveData< Vertex >
+{
+	std::vector< Vertex > oocPoints;
+	std::vector< std::pair< int , int > > edges;
+	int threadIndex;
+	int edgeIndex;
+	int oocPointIndex;
+public:
+	CoredVectorCurveData(void);
+
+	void resetIterator(void);
+
+	int addOutOfCorePoint( const Vertex& p );
+	int addOutOfCorePoint_s( const Vertex& p );
+	void addEdge_s( CoredVertexIndex v1 , CoredVertexIndex v2 );
+	void addEdge_s( int v1 , int v2 );
+
+	int nextOutOfCorePoint( Vertex& p );
+	int nextEdge( CoredVertexIndex& v1 , CoredVertexIndex& v2 );
+
+	int outOfCorePointCount(void);
+	int edgeCount( void );
+};
 template< class Vertex >
 class CoredVectorMeshData : public CoredMeshData< Vertex >
 {
 	std::vector< Vertex > oocPoints;
-	std::vector< std::vector< int > > polygons;
+	std::vector< std::vector< std::vector< int > > > polygons;
+	int threadIndex;
 	int polygonIndex;
 	int oocPointIndex;
 public:
@@ -364,8 +495,8 @@ class CoredVectorMeshData : public CoredMeshData< Vertex >
 
 	int addOutOfCorePoint( const Vertex& p );
 	int addOutOfCorePoint_s( const Vertex& p );
-	int addPolygon_s( const std::vector< CoredVertexIndex >& vertices );
-	int addPolygon_s( const std::vector< int >& vertices );
+	void addPolygon_s( const std::vector< CoredVertexIndex >& vertices );
+	void addPolygon_s( const std::vector< int >& vertices );
 
 	int nextOutOfCorePoint( Vertex& p );
 	int nextPolygon( std::vector< CoredVertexIndex >& vertices );
@@ -380,18 +511,120 @@ class BufferedReadWriteFile
 	char *_buffer , _fileName[1024];
 	size_t _bufferIndex , _bufferSize;
 public:
-	BufferedReadWriteFile( const char* fileName=NULL , const char* fileHeader="" , int bufferSize=(1<<20) );
-	~BufferedReadWriteFile( void );
-	bool write( const void* data , size_t size );
-	bool read ( void* data , size_t size );
-	void reset( void );
+	BufferedReadWriteFile( const char* fileName=NULL , const char* fileHeader="" , int bufferSize=(1<<20) )
+	{
+		_bufferIndex = 0;
+		_bufferSize = bufferSize;
+		if( fileName ) strcpy( _fileName , fileName ) , tempFile = false , _fp = fopen( _fileName , "w+b" );
+		else
+		{
+			if( fileHeader && strlen(fileHeader) ) sprintf( _fileName , "%sXXXXXX" , fileHeader );
+			else strcpy( _fileName , "XXXXXX" );
+#ifdef _WIN32
+			_mktemp( _fileName );
+			_fp = fopen( _fileName , "w+b" );
+#else // !_WIN32
+			_fp = fdopen( mkstemp( _fileName ) , "w+b" );
+#endif // _WIN32
+			tempFile = true;
+		}
+		if( !_fp ) ERROR_OUT( "Failed to open file: %s" , _fileName );
+		_buffer = (char*) malloc( _bufferSize );
+	}
+	~BufferedReadWriteFile( void )
+	{
+		free( _buffer );
+		fclose( _fp );
+		if( tempFile ) remove( _fileName );
+	}
+	bool write( const void* data , size_t size )
+	{
+		if( !size ) return true;
+		const char* _data = (char*) data;
+		size_t sz = _bufferSize - _bufferIndex;
+		while( sz<=size )
+		{
+			memcpy( _buffer+_bufferIndex , _data , sz );
+			fwrite( _buffer , 1 , _bufferSize , _fp );
+			_data += sz;
+			size -= sz;
+			_bufferIndex = 0;
+			sz = _bufferSize;
+		}
+		if( size )
+		{
+			memcpy( _buffer+_bufferIndex , _data , size );
+			_bufferIndex += size;
+		}
+		return true;
+	}
+	bool read( void* data , size_t size )
+	{
+		if( !size ) return true;
+		char *_data = (char*) data;
+		size_t sz = _bufferSize - _bufferIndex;
+		while( sz<=size )
+		{
+			if( size && !_bufferSize ) return false;
+			memcpy( _data , _buffer+_bufferIndex , sz );
+			_bufferSize = fread( _buffer , 1 , _bufferSize , _fp );
+			_data += sz;
+			size -= sz;
+			_bufferIndex = 0;
+			if( !size ) return true;
+			sz = _bufferSize;
+		}
+		if( size )
+		{
+			if( !_bufferSize ) return false;
+			memcpy( _data , _buffer+_bufferIndex , size );
+			_bufferIndex += size;
+		}
+		return true;
+	}
+	void reset( void )
+	{
+		if( _bufferIndex ) fwrite( _buffer , 1 , _bufferIndex , _fp );
+		_bufferIndex = 0;
+		fseek( _fp , 0 , SEEK_SET );
+		_bufferIndex = 0;
+		_bufferSize = fread( _buffer , 1 , _bufferSize , _fp );
+	}
 };
+template< class Vertex >
+class CoredFileCurveData : public CoredCurveData< Vertex >
+{
+	BufferedReadWriteFile *oocPointFile;
+	int oocPoints;
+//	std::vector< int > polygons;
+	std::vector< BufferedReadWriteFile* > edgeFiles;
+	int threadIndex;
+public:
+	CoredFileCurveData( const char* fileHeader="" );
+	~CoredFileCurveData( void );
+
+	void resetIterator( void );
+
+	int addOutOfCorePoint( const Vertex& p );
+	int addOutOfCorePoint_s( const Vertex& p );
+	void addEdge_s( CoredVertexIndex v1 , CoredVertexIndex v2 );
+	void addEdge_s( int v1 , int v2 );
+
+	int nextOutOfCorePoint( Vertex& p );
+	int nextEdge( CoredVertexIndex& v1 , CoredVertexIndex& v2 );
+
+	int outOfCorePointCount( void );
+	int edgeCount( void );
+};
+
 template< class Vertex >
 class CoredFileMeshData : public CoredMeshData< Vertex >
 {
-	char pointFileName[1024] , polygonFileName[1024];
-	BufferedReadWriteFile *oocPointFile , *polygonFile;
-	int oocPoints , polygons;
+	BufferedReadWriteFile *oocPointFile;
+	int oocPoints;
+	std::vector< int > polygons;
+	std::vector< BufferedReadWriteFile* > polygonFiles;
+	int threadIndex;
 public:
 	CoredFileMeshData( const char* fileHeader="" );
 	~CoredFileMeshData( void );
@@ -400,8 +633,8 @@ class CoredFileMeshData : public CoredMeshData< Vertex >
 
 	int addOutOfCorePoint( const Vertex& p );
 	int addOutOfCorePoint_s( const Vertex& p );
-	int addPolygon_s( const std::vector< CoredVertexIndex >& vertices );
-	int addPolygon_s( const std::vector< int >& vertices );
+	void addPolygon_s( const std::vector< CoredVertexIndex >& vertices );
+	void addPolygon_s( const std::vector< int >& vertices );
 
 	int nextOutOfCorePoint( Vertex& p );
 	int nextPolygon( std::vector< CoredVertexIndex >& vertices );
diff --git a/Src/Geometry.inl b/Src/Geometry.inl
index ad2be93..5f26907 100644
--- a/Src/Geometry.inl
+++ b/Src/Geometry.inl
@@ -27,366 +27,133 @@ DAMAGE.
 */
 
 #include <stdio.h>
+#include "MyMiscellany.h"
 
-template<class Real>
-Real Random(void){return Real(rand())/RAND_MAX;}
+template< class Real > Real Random( void ){ return Real( rand() )/RAND_MAX; }
 
-template<class Real>
-Point3D<Real> RandomBallPoint(void){
-	Point3D<Real> p;
-	while(1){
-		p.coords[0]=Real(1.0-2.0*Random<Real>());
-		p.coords[1]=Real(1.0-2.0*Random<Real>());
-		p.coords[2]=Real(1.0-2.0*Random<Real>());
+template< class Real , int Dim >
+Point< Real , Dim > RandomBallPoint( void )
+{
+	Point< Real , Dim > p;
+	while(1)
+	{
+		for( int d=0 ; d<Dim ; d++ ) p[d] = Real( 1.0-2.0*Random< Real >() );
 		double l=SquareLength(p);
-		if(l<=1){return p;}
-	}
-}
-template<class Real>
-Point3D<Real> RandomSpherePoint(void){
-	Point3D<Real> p=RandomBallPoint<Real>();
-	Real l=Real(Length(p));
-	p.coords[0]/=l;
-	p.coords[1]/=l;
-	p.coords[2]/=l;
-	return p;
-}
-
-template<class Real>
-double SquareLength(const Point3D<Real>& p){return p.coords[0]*p.coords[0]+p.coords[1]*p.coords[1]+p.coords[2]*p.coords[2];}
-
-template<class Real>
-double Length(const Point3D<Real>& p){return sqrt(SquareLength(p));}
-
-template<class Real>
-double SquareDistance(const Point3D<Real>& p1,const Point3D<Real>& p2){
-	return (p1.coords[0]-p2.coords[0])*(p1.coords[0]-p2.coords[0])+(p1.coords[1]-p2.coords[1])*(p1.coords[1]-p2.coords[1])+(p1.coords[2]-p2.coords[2])*(p1.coords[2]-p2.coords[2]);
-}
-
-template<class Real>
-double Distance(const Point3D<Real>& p1,const Point3D<Real>& p2){return sqrt(SquareDistance(p1,p2));}
-
-template <class Real>
-void CrossProduct(const Point3D<Real>& p1,const Point3D<Real>& p2,Point3D<Real>& p){
-	p.coords[0]= p1.coords[1]*p2.coords[2]-p1.coords[2]*p2.coords[1];
-	p.coords[1]=-p1.coords[0]*p2.coords[2]+p1.coords[2]*p2.coords[0];
-	p.coords[2]= p1.coords[0]*p2.coords[1]-p1.coords[1]*p2.coords[0];
-}
-template<class Real>
-void EdgeCollapse(const Real& edgeRatio,std::vector<TriangleIndex>& triangles,std::vector< Point3D<Real> >& positions,std::vector< Point3D<Real> >* normals){
-	int i,j,*remapTable,*pointCount,idx[3];
-	Point3D<Real> p[3],q[2],c;
-	double d[3],a;
-	double Ratio=12.0/sqrt(3.0);	// (Sum of Squares Length / Area) for and equilateral triangle
-
-	remapTable=new int[positions.size()];
-	pointCount=new int[positions.size()];
-	for(i=0;i<int(positions.size());i++){
-		remapTable[i]=i;
-		pointCount[i]=1;
-	}
-	for(i=int(triangles.size()-1);i>=0;i--){
-		for(j=0;j<3;j++){
-			idx[j]=triangles[i].idx[j];
-			while(remapTable[idx[j]]<idx[j]){idx[j]=remapTable[idx[j]];}
-		}
-		if(idx[0]==idx[1] || idx[0]==idx[2] || idx[1]==idx[2]){
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-			continue;
-		}
-		for(j=0;j<3;j++){
-			p[j].coords[0]=positions[idx[j]].coords[0]/pointCount[idx[j]];
-			p[j].coords[1]=positions[idx[j]].coords[1]/pointCount[idx[j]];
-			p[j].coords[2]=positions[idx[j]].coords[2]/pointCount[idx[j]];
-		}
-		for(j=0;j<3;j++){
-			q[0].coords[j]=p[1].coords[j]-p[0].coords[j];
-			q[1].coords[j]=p[2].coords[j]-p[0].coords[j];
-			d[j]=SquareDistance(p[j],p[(j+1)%3]);
-		}
-		CrossProduct(q[0],q[1],c);
-		a=Length(c)/2;
-
-		if((d[0]+d[1]+d[2])*edgeRatio > a*Ratio){
-			// Find the smallest edge
-			j=0;
-			if(d[1]<d[j]){j=1;}
-			if(d[2]<d[j]){j=2;}
-
-			int idx1,idx2;
-			if(idx[j]<idx[(j+1)%3]){
-				idx1=idx[j];
-				idx2=idx[(j+1)%3];
-			}
-			else{
-				idx2=idx[j];
-				idx1=idx[(j+1)%3];
-			}
-			positions[idx1].coords[0]+=positions[idx2].coords[0];
-			positions[idx1].coords[1]+=positions[idx2].coords[1];
-			positions[idx1].coords[2]+=positions[idx2].coords[2];
-			if(normals){
-				(*normals)[idx1].coords[0]+=(*normals)[idx2].coords[0];
-				(*normals)[idx1].coords[1]+=(*normals)[idx2].coords[1];
-				(*normals)[idx1].coords[2]+=(*normals)[idx2].coords[2];
-			}
-			pointCount[idx1]+=pointCount[idx2];
-			remapTable[idx2]=idx1;
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-		}
-	}
-	int pCount=0;
-	for(i=0;i<int(positions.size());i++){
-		for(j=0;j<3;j++){positions[i].coords[j]/=pointCount[i];}
-		if(normals){
-			Real l=Real(Length((*normals)[i]));
-			for(j=0;j<3;j++){(*normals)[i].coords[j]/=l;}
-		}
-		if(remapTable[i]==i){ // If vertex i is being used
-			positions[pCount]=positions[i];
-			if(normals){(*normals)[pCount]=(*normals)[i];}
-			pointCount[i]=pCount;
-			pCount++;
-		}
+		if( SquareLength( p )<=1 ) return p;
 	}
-	positions.resize(pCount);
-	for(i=int(triangles.size()-1);i>=0;i--){
-		for(j=0;j<3;j++){
-			idx[j]=triangles[i].idx[j];
-			while(remapTable[idx[j]]<idx[j]){idx[j]=remapTable[idx[j]];}
-			triangles[i].idx[j]=pointCount[idx[j]];
-		}
-		if(idx[0]==idx[1] || idx[0]==idx[2] || idx[1]==idx[2]){
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-		}
-	}
-
-	delete[] pointCount;
-	delete[] remapTable;
 }
-template<class Real>
-void TriangleCollapse(const Real& edgeRatio,std::vector<TriangleIndex>& triangles,std::vector< Point3D<Real> >& positions,std::vector< Point3D<Real> >* normals){
-	int i,j,*remapTable,*pointCount,idx[3];
-	Point3D<Real> p[3],q[2],c;
-	double d[3],a;
-	double Ratio=12.0/sqrt(3.0);	// (Sum of Squares Length / Area) for and equilateral triangle
-
-	remapTable=new int[positions.size()];
-	pointCount=new int[positions.size()];
-	for(i=0;i<int(positions.size());i++){
-		remapTable[i]=i;
-		pointCount[i]=1;
-	}
-	for(i=int(triangles.size()-1);i>=0;i--){
-		for(j=0;j<3;j++){
-			idx[j]=triangles[i].idx[j];
-			while(remapTable[idx[j]]<idx[j]){idx[j]=remapTable[idx[j]];}
-		}
-		if(idx[0]==idx[1] || idx[0]==idx[2] || idx[1]==idx[2]){
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-			continue;
-		}
-		for(j=0;j<3;j++){
-			p[j].coords[0]=positions[idx[j]].coords[0]/pointCount[idx[j]];
-			p[j].coords[1]=positions[idx[j]].coords[1]/pointCount[idx[j]];
-			p[j].coords[2]=positions[idx[j]].coords[2]/pointCount[idx[j]];
-		}
-		for(j=0;j<3;j++){
-			q[0].coords[j]=p[1].coords[j]-p[0].coords[j];
-			q[1].coords[j]=p[2].coords[j]-p[0].coords[j];
-			d[j]=SquareDistance(p[j],p[(j+1)%3]);
-		}
-		CrossProduct(q[0],q[1],c);
-		a=Length(c)/2;
-
-		if((d[0]+d[1]+d[2])*edgeRatio > a*Ratio){
-			// Find the smallest edge
-			j=0;
-			if(d[1]<d[j]){j=1;}
-			if(d[2]<d[j]){j=2;}
-
-			int idx1,idx2,idx3;
-			if(idx[0]<idx[1]){
-				if(idx[0]<idx[2]){
-					idx1=idx[0];
-					idx2=idx[2];
-					idx3=idx[1];
-				}
-				else{
-					idx1=idx[2];
-					idx2=idx[0];
-					idx3=idx[1];
-				}
-			}
-			else{
-				if(idx[1]<idx[2]){
-					idx1=idx[1];
-					idx2=idx[2];
-					idx3=idx[0];
-				}
-				else{
-					idx1=idx[2];
-					idx2=idx[1];
-					idx3=idx[0];
-				}
-			}
-			positions[idx1].coords[0]+=positions[idx2].coords[0]+positions[idx3].coords[0];
-			positions[idx1].coords[1]+=positions[idx2].coords[1]+positions[idx3].coords[1];
-			positions[idx1].coords[2]+=positions[idx2].coords[2]+positions[idx3].coords[2];
-			if(normals){
-				(*normals)[idx1].coords[0]+=(*normals)[idx2].coords[0]+(*normals)[idx3].coords[0];
-				(*normals)[idx1].coords[1]+=(*normals)[idx2].coords[1]+(*normals)[idx3].coords[1];
-				(*normals)[idx1].coords[2]+=(*normals)[idx2].coords[2]+(*normals)[idx3].coords[2];
-			}
-			pointCount[idx1]+=pointCount[idx2]+pointCount[idx3];
-			remapTable[idx2]=idx1;
-			remapTable[idx3]=idx1;
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-		}
-	}
-	int pCount=0;
-	for(i=0;i<int(positions.size());i++){
-		for(j=0;j<3;j++){positions[i].coords[j]/=pointCount[i];}
-		if(normals){
-			Real l=Real(Length((*normals)[i]));
-			for(j=0;j<3;j++){(*normals)[i].coords[j]/=l;}
-		}
-		if(remapTable[i]==i){ // If vertex i is being used
-			positions[pCount]=positions[i];
-			if(normals){(*normals)[pCount]=(*normals)[i];}
-			pointCount[i]=pCount;
-			pCount++;
-		}
-	}
-	positions.resize(pCount);
-	for(i=int(triangles.size()-1);i>=0;i--){
-		for(j=0;j<3;j++){
-			idx[j]=triangles[i].idx[j];
-			while(remapTable[idx[j]]<idx[j]){idx[j]=remapTable[idx[j]];}
-			triangles[i].idx[j]=pointCount[idx[j]];
-		}
-		if(idx[0]==idx[1] || idx[0]==idx[2] || idx[1]==idx[2]){
-			triangles[i]=triangles[triangles.size()-1];
-			triangles.pop_back();
-		}
-	}
-	delete[] pointCount;
-	delete[] remapTable;
+template< class Real , int Dim >
+Point< Real , Dim > RandomSpherePoint( void )
+{
+	Point< Real , Dim > p = RandomBallPoint< Real , Dim >();
+	return p / (Real)Length( p );
 }
 
 ///////////////////
 // Triangulation //
 ///////////////////
-template<class Real>
-long long Triangulation<Real>::EdgeIndex( int p1 , int p2 )
+template< class Real , unsigned int Dim >
+long long Triangulation< Real , Dim >::EdgeIndex( int p1 , int p2 )
 {
-	if(p1>p2)	{return ((long long)(p1)<<32) | ((long long)(p2));}
-	else		{return ((long long)(p2)<<32) | ((long long)(p1));}
+	if( p1>p2 ) return ((long long)(p1)<<32) | ((long long)(p2));
+	else        return ((long long)(p2)<<32) | ((long long)(p1));
 }
 
-template<class Real>
-int Triangulation<Real>::factor(int tIndex,int& p1,int& p2,int & p3){
-	if(triangles[tIndex].eIndex[0]<0 || triangles[tIndex].eIndex[1]<0 || triangles[tIndex].eIndex[2]<0){return 0;}
-	if(edges[triangles[tIndex].eIndex[0]].tIndex[0]==tIndex){p1=edges[triangles[tIndex].eIndex[0]].pIndex[0];}
-	else													{p1=edges[triangles[tIndex].eIndex[0]].pIndex[1];}
-	if(edges[triangles[tIndex].eIndex[1]].tIndex[0]==tIndex){p2=edges[triangles[tIndex].eIndex[1]].pIndex[0];}
-	else													{p2=edges[triangles[tIndex].eIndex[1]].pIndex[1];}
-	if(edges[triangles[tIndex].eIndex[2]].tIndex[0]==tIndex){p3=edges[triangles[tIndex].eIndex[2]].pIndex[0];}
-	else													{p3=edges[triangles[tIndex].eIndex[2]].pIndex[1];}
+template< class Real , unsigned int Dim >
+int Triangulation< Real , Dim >::factor( int tIndex , int& p1 , int& p2 , int & p3 ) const
+{
+	if( triangles[tIndex].eIndex[0]<0 || triangles[tIndex].eIndex[1]<0 || triangles[tIndex].eIndex[2]<0 ) return 0;
+	if( edges[triangles[tIndex].eIndex[0]].tIndex[0]==tIndex ) p1=edges[triangles[tIndex].eIndex[0]].pIndex[0];
+	else                                                       p1=edges[triangles[tIndex].eIndex[0]].pIndex[1];
+	if( edges[triangles[tIndex].eIndex[1]].tIndex[0]==tIndex ) p2=edges[triangles[tIndex].eIndex[1]].pIndex[0];
+	else                                                       p2=edges[triangles[tIndex].eIndex[1]].pIndex[1];
+	if( edges[triangles[tIndex].eIndex[2]].tIndex[0]==tIndex ) p3=edges[triangles[tIndex].eIndex[2]].pIndex[0];
+	else                                                       p3=edges[triangles[tIndex].eIndex[2]].pIndex[1];
 	return 1;
 }
-template<class Real>
-double Triangulation<Real>::area(int p1,int p2,int p3){
-	Point3D<Real> q1,q2,q;
-	for(int i=0;i<3;i++){
-		q1.coords[i]=points[p2].coords[i]-points[p1].coords[i];
-		q2.coords[i]=points[p3].coords[i]-points[p1].coords[i];
-	}
-	CrossProduct(q1,q2,q);
-	return Length(q);
-}
-template<class Real>
-double Triangulation<Real>::area(int tIndex){
-	int p1,p2,p3;
-	factor(tIndex,p1,p2,p3);
+template< class Real , unsigned int Dim > Real Triangulation< Real , Dim >::area( int p1 , int p2 , int p3 ) const { return Area( points[p1] , points[p2] , points[p3] ); }
+template< class Real , unsigned int Dim >
+Real Triangulation< Real , Dim >::area( int tIndex ) const
+{
+	int p1 , p2 , p3;
+	factor( tIndex , p1 , p2 , p3 );
 	return area(p1,p2,p3);
 }
-template<class Real>
-double Triangulation<Real>::area(void){
-	double a=0;
-	for(int i=0;i<int(triangles.size());i++){a+=area(i);}
+template< class Real , unsigned int Dim >
+Real Triangulation< Real , Dim >::area( void ) const
+{
+	Real a=0;
+	for( int i=0 ; i<(int)triangles.size() ; i++ ) a += area(i);
 	return a;
 }
-template<class Real>
-int Triangulation<Real>::addTriangle(int p1,int p2,int p3)
+template< class Real , unsigned int Dim >
+int Triangulation< Real , Dim >::addTriangle( int p1 , int p2 , int p3 )
 {
 	std::unordered_map<long long, int>::iterator iter;
-	int tIdx,eIdx,p[3];
-	p[0]=p1;
-	p[1]=p2;
-	p[2]=p3;
-	triangles.push_back(TriangulationTriangle());
-	tIdx=int(triangles.size())-1;
+	int tIdx , eIdx , p[] = { p1 , p2 , p3 };
+	triangles.push_back( TriangulationTriangle() );
+	tIdx = (int)triangles.size()-1;
 
-	for(int i=0;i<3;i++)
+	for( int i=0 ; i<3 ; i++ )
 	{
-		long long e = EdgeIndex(p[i],p[(i+1)%3]);
-		iter=edgeMap.find(e);
-		if(iter==edgeMap.end())
+		long long e = EdgeIndex( p[i] , p[(i+1)%3] );
+		iter = edgeMap.find(e);
+		if( iter==edgeMap.end() )
 		{
 			TriangulationEdge edge;
-			edge.pIndex[0]=p[i];
-			edge.pIndex[1]=p[(i+1)%3];
+			edge.pIndex[0] = p[i];
+			edge.pIndex[1] = p[(i+1)%3];
 			edges.push_back(edge);
-			eIdx=int(edges.size())-1;
-			edgeMap[e]=eIdx;
+			eIdx = (int)edges.size()-1;
+			edgeMap[e] = eIdx;
 			edges[eIdx].tIndex[0]=tIdx;
 		}
-		else{
-			eIdx=edgeMap[e];
-			if(edges[eIdx].pIndex[0]==p[i]){
-				if(edges[eIdx].tIndex[0]<0){edges[eIdx].tIndex[0]=tIdx;}
-				else{printf("Edge Triangle in use 1\n");return 0;}
+		else
+		{
+			eIdx = edgeMap[e];
+			if( edges[eIdx].pIndex[0]==p[i] )
+			{
+				if( edges[eIdx].tIndex[0]<0 ) edges[eIdx].tIndex[0] = tIdx;
+				else{ printf( "Edge Triangle in use 1\n" ) ; return 0; }
 			}
-			else{
-				if(edges[eIdx].tIndex[1]<0){edges[eIdx].tIndex[1]=tIdx;}
-				else{printf("Edge Triangle in use 2\n");return 0;}
+			else
+			{
+				if( edges[eIdx].tIndex[1]<0 ) edges[eIdx].tIndex[1] = tIdx;
+				else{ printf( "Edge Triangle in use 2\n") ; return 0; }
 			}
 
 		}
-		triangles[tIdx].eIndex[i]=eIdx;
+		triangles[tIdx].eIndex[i] = eIdx;
 	}
 	return tIdx;
 }
-template<class Real>
-int Triangulation<Real>::flipMinimize(int eIndex){
-	double oldArea,newArea;
-	int oldP[3],oldQ[3],newP[3],newQ[3];
+template< class Real , unsigned int Dim >
+int Triangulation< Real , Dim >::flipMinimize( int eIndex )
+{
+	Real oldArea,newArea;
+	int oldP[3] , oldQ[3] , newP[3] , newQ[3];
 	TriangulationEdge newEdge;
 
-	if(edges[eIndex].tIndex[0]<0 || edges[eIndex].tIndex[1]<0){return 0;}
+	if( edges[eIndex].tIndex[0]<0 || edges[eIndex].tIndex[1]<0 ) return 0;
 
-	if(!factor(edges[eIndex].tIndex[0],oldP[0],oldP[1],oldP[2])){return 0;}
-	if(!factor(edges[eIndex].tIndex[1],oldQ[0],oldQ[1],oldQ[2])){return 0;}
+	if( !factor( edges[eIndex].tIndex[0] , oldP[0] , oldP[1] , oldP[2] ) ) return 0;
+	if( !factor( edges[eIndex].tIndex[1] , oldQ[0] , oldQ[1] , oldQ[2] ) ) return 0;
 
-	oldArea=area(oldP[0],oldP[1],oldP[2])+area(oldQ[0],oldQ[1],oldQ[2]);
-	int idxP,idxQ;
-	for(idxP=0;idxP<3;idxP++){
+	oldArea = area( oldP[0] , oldP[1] , oldP[2] ) + area( oldQ[0] , oldQ[1] , oldQ[2] );
+	int idxP , idxQ;
+	for( idxP=0 ; idxP<3 ; idxP++ )
+	{
 		int i;
-		for(i=0;i<3;i++){if(oldP[idxP]==oldQ[i]){break;}}
-		if(i==3){break;}
+		for( i=0 ; i<3 ; i++ ) if( oldP[idxP]==oldQ[i] ) break;
+		if(i==3) break;
 	}
-	for(idxQ=0;idxQ<3;idxQ++){
+	for( idxQ=0 ; idxQ<3 ; idxQ++ )
+	{
 		int i;
-		for(i=0;i<3;i++){if(oldP[i]==oldQ[idxQ]){break;}}
-		if(i==3){break;}
+		for( i=0 ; i<3 ; i++ ) if( oldP[i]==oldQ[idxQ] ) break;
+		if( i==3 ) break;
 	}
-	if(idxP==3 || idxQ==3){return 0;}
+	if(idxP==3 || idxQ==3) return 0;
 	newP[0]=oldP[idxP];
 	newP[1]=oldP[(idxP+1)%3];
 	newP[2]=oldQ[idxQ];
@@ -394,31 +161,34 @@ int Triangulation<Real>::flipMinimize(int eIndex){
 	newQ[1]=oldP[(idxP+2)%3];
 	newQ[2]=oldP[idxP];
 
-	newArea=area(newP[0],newP[1],newP[2])+area(newQ[0],newQ[1],newQ[2]);
-	if(oldArea<=newArea){return 0;}
+	newArea = area( newP[0] , newP[1] , newP[2] ) + area( newQ[0] , newQ[1] , newQ[2] );
+	if( oldArea<=newArea ) return 0;
 
-	// Remove the entry in the hash_table for the old edge
-	edgeMap.erase(EdgeIndex(edges[eIndex].pIndex[0],edges[eIndex].pIndex[1]));
+	// Remove the entry in the hash-table for the old edge
+	edgeMap.erase( EdgeIndex( edges[eIndex].pIndex[0] , edges[eIndex].pIndex[1] ) );
 	// Set the new edge so that the zero-side is newQ
-	edges[eIndex].pIndex[0]=newP[0];
-	edges[eIndex].pIndex[1]=newQ[0];
-	// Insert the entry into the hash_table for the new edge
-	edgeMap[EdgeIndex(newP[0],newQ[0])]=eIndex;
+	edges[eIndex].pIndex[0] = newP[0];
+	edges[eIndex].pIndex[1] = newQ[0];
+	// Insert the entry into the hash-table for the new edge
+	edgeMap[EdgeIndex(newP[0],newQ[0])] = eIndex;
 	// Update the triangle information
-	for(int i=0;i<3;i++){
+	for( int i=0 ; i<3 ; i++ )
+	{
 		int idx;
-		idx=edgeMap[EdgeIndex(newQ[i],newQ[(i+1)%3])];
-		triangles[edges[eIndex].tIndex[0]].eIndex[i]=idx;
-		if(idx!=eIndex){
-			if(edges[idx].tIndex[0]==edges[eIndex].tIndex[1]){edges[idx].tIndex[0]=edges[eIndex].tIndex[0];}
-			if(edges[idx].tIndex[1]==edges[eIndex].tIndex[1]){edges[idx].tIndex[1]=edges[eIndex].tIndex[0];}
+		idx = edgeMap[EdgeIndex(newQ[i],newQ[(i+1)%3])];
+		triangles[edges[eIndex].tIndex[0]].eIndex[i] = idx;
+		if(idx!=eIndex)
+		{
+			if( edges[idx].tIndex[0]==edges[eIndex].tIndex[1] ) edges[idx].tIndex[0] = edges[eIndex].tIndex[0];
+			if( edges[idx].tIndex[1]==edges[eIndex].tIndex[1] ) edges[idx].tIndex[1] = edges[eIndex].tIndex[0];
 		}
 
-		idx=edgeMap[EdgeIndex(newP[i],newP[(i+1)%3])];
+		idx = edgeMap[EdgeIndex(newP[i],newP[(i+1)%3])];
 		triangles[edges[eIndex].tIndex[1]].eIndex[i]=idx;
-		if(idx!=eIndex){
-			if(edges[idx].tIndex[0]==edges[eIndex].tIndex[0]){edges[idx].tIndex[0]=edges[eIndex].tIndex[1];}
-			if(edges[idx].tIndex[1]==edges[eIndex].tIndex[0]){edges[idx].tIndex[1]=edges[eIndex].tIndex[1];}
+		if( idx!=eIndex )
+		{
+			if( edges[idx].tIndex[0]==edges[eIndex].tIndex[0] ) edges[idx].tIndex[0]=edges[eIndex].tIndex[1];
+			if( edges[idx].tIndex[1]==edges[eIndex].tIndex[0] ) edges[idx].tIndex[1]=edges[eIndex].tIndex[1];
 		}
 	}
 	return 1;
@@ -427,9 +197,9 @@ int Triangulation<Real>::flipMinimize(int eIndex){
 // CoredVectorMeshData //
 /////////////////////////
 template< class Vertex >
-CoredVectorMeshData< Vertex >::CoredVectorMeshData( void ) { oocPointIndex = polygonIndex = 0; }
+CoredVectorMeshData< Vertex >::CoredVectorMeshData( void ) { oocPointIndex = polygonIndex = threadIndex = 0 ; polygons.resize( omp_get_max_threads() ); }
 template< class Vertex >
-void CoredVectorMeshData< Vertex >::resetIterator ( void ) { oocPointIndex = polygonIndex = 0; }
+void CoredVectorMeshData< Vertex >::resetIterator ( void ) { oocPointIndex = polygonIndex = threadIndex = 0; }
 template< class Vertex >
 int CoredVectorMeshData< Vertex >::addOutOfCorePoint( const Vertex& p )
 {
@@ -448,18 +218,12 @@ int CoredVectorMeshData< Vertex >::addOutOfCorePoint_s( const Vertex& p )
 	return (int)sz;
 }
 template< class Vertex >
-int CoredVectorMeshData< Vertex >::addPolygon_s( const std::vector< int >& polygon )
+void CoredVectorMeshData< Vertex >::addPolygon_s( const std::vector< int >& polygon )
 {
-	size_t sz;
-#pragma omp critical (CoredVectorMeshData_addPolygon_s)
-	{
-		sz = polygon.size();
-		polygons.push_back( polygon );
-	}
-	return (int)sz;
+	polygons[ omp_get_thread_num() ].push_back( polygon );
 }
 template< class Vertex >
-int CoredVectorMeshData< Vertex >::addPolygon_s( const std::vector< CoredVertexIndex >& vertices )
+void CoredVectorMeshData< Vertex >::addPolygon_s( const std::vector< CoredVertexIndex >& vertices )
 {
 	std::vector< int > polygon( vertices.size() );
 	for( int i=0 ; i<(int)vertices.size() ; i++ ) 
@@ -475,26 +239,38 @@ int CoredVectorMeshData< Vertex >::nextOutOfCorePoint( Vertex& p )
 		p=oocPoints[oocPointIndex++];
 		return 1;
 	}
-	else{return 0;}
+	else return 0;
 }
 template< class Vertex >
 int CoredVectorMeshData< Vertex >::nextPolygon( std::vector< CoredVertexIndex >& vertices )
 {
-	if( polygonIndex<int( polygons.size() ) )
+	while( true )
 	{
-		std::vector< int >& polygon = polygons[ polygonIndex++ ];
-		vertices.resize( polygon.size() );
-		for( int i=0 ; i<int(polygon.size()) ; i++ )
-			if( polygon[i]<0 ) vertices[i].idx = -polygon[i]-1 , vertices[i].inCore = false;
-			else               vertices[i].idx =  polygon[i]   , vertices[i].inCore = true;
-		return 1;
+		if( threadIndex<(int)polygons.size() )
+		{
+			if( polygonIndex<int( polygons[threadIndex].size() ) )
+			{
+				std::vector< int >& polygon = polygons[threadIndex][ polygonIndex++ ];
+				vertices.resize( polygon.size() );
+				for( int i=0 ; i<int(polygon.size()) ; i++ )
+					if( polygon[i]<0 ) vertices[i].idx = -polygon[i]-1 , vertices[i].inCore = false;
+					else               vertices[i].idx =  polygon[i]   , vertices[i].inCore = true;
+					return 1;
+			}
+			else threadIndex++ , polygonIndex = 0;
+		}
+		else return 0;
 	}
-	else return 0;
 }
 template< class Vertex >
 int CoredVectorMeshData< Vertex >::outOfCorePointCount(void){return int(oocPoints.size());}
 template< class Vertex >
-int CoredVectorMeshData< Vertex >::polygonCount( void ) { return int( polygons.size() ); }
+int CoredVectorMeshData< Vertex >::polygonCount( void )
+{
+	int count = 0;
+	for( int i=0 ; i<polygons.size() ; i++ ) count += (int)polygons[i].size();
+	return count;
+}
 
 ///////////////////////
 // CoredFileMeshData //
@@ -502,22 +278,27 @@ int CoredVectorMeshData< Vertex >::polygonCount( void ) { return int( polygons.s
 template< class Vertex >
 CoredFileMeshData< Vertex >::CoredFileMeshData( const char* fileHeader )
 {
-	oocPoints = polygons = 0;
-	
+	threadIndex = 0;
+	oocPoints = 0;
+	polygons.resize( omp_get_max_threads() );
+	for( int i=0 ; i<polygons.size() ; i++ ) polygons[i] = 0;
+
 	oocPointFile = new BufferedReadWriteFile( NULL , fileHeader );
-	polygonFile = new BufferedReadWriteFile( NULL , fileHeader );
+	polygonFiles.resize( omp_get_max_threads() );
+	for( int i=0 ; i<polygonFiles.size() ; i++ ) polygonFiles[i] = new BufferedReadWriteFile( NULL , fileHeader );
 }
 template< class Vertex >
 CoredFileMeshData< Vertex >::~CoredFileMeshData( void )
 {
 	delete oocPointFile;
-	delete polygonFile;
+	for( int i=0 ; i<polygonFiles.size() ; i++ ) delete polygonFiles[i];
 }
 template< class Vertex >
 void CoredFileMeshData< Vertex >::resetIterator ( void )
 {
 	oocPointFile->reset();
-	polygonFile->reset();
+	threadIndex = 0;
+	for( int i=0 ; i<polygonFiles.size() ; i++ ) polygonFiles[i]->reset();
 }
 template< class Vertex >
 int CoredFileMeshData< Vertex >::addOutOfCorePoint( const Vertex& p )
@@ -539,20 +320,16 @@ int CoredFileMeshData< Vertex >::addOutOfCorePoint_s( const Vertex& p )
 	return sz;
 }
 template< class Vertex >
-int CoredFileMeshData< Vertex >::addPolygon_s( const std::vector< int >& vertices )
+void CoredFileMeshData< Vertex >::addPolygon_s( const std::vector< int >& vertices )
 {
-	int sz , vSize = (int)vertices.size();
-#pragma omp critical (CoredFileMeshData_addPolygon_s )
-	{
-		sz = polygons;
-		polygonFile->write( &vSize , sizeof(int) );
-		polygonFile->write( &vertices[0] , sizeof(int) * vSize );
-		polygons++;
-	}
-	return sz;
+	int vSize = (int)vertices.size();
+	int thread = omp_get_thread_num();
+	polygonFiles[thread]->write( &vSize , sizeof(int) );
+	polygonFiles[thread]->write( &vertices[0] , sizeof(int) * vSize );
+	polygons[thread]++;
 }
 template< class Vertex >
-int CoredFileMeshData< Vertex >::addPolygon_s( const std::vector< CoredVertexIndex >& vertices )
+void CoredFileMeshData< Vertex >::addPolygon_s( const std::vector< CoredVertexIndex >& vertices )
 {
 	std::vector< int > polygon( vertices.size() );
 	for( int i=0 ; i<(int)vertices.size() ; i++ ) 
@@ -569,23 +346,91 @@ int CoredFileMeshData< Vertex >::nextOutOfCorePoint( Vertex& p )
 template< class Vertex >
 int CoredFileMeshData< Vertex >::nextPolygon( std::vector< CoredVertexIndex >& vertices )
 {
-	int pSize;
-	if( polygonFile->read( &pSize , sizeof(int) ) )
+	while( true )
 	{
-		std::vector< int > polygon( pSize );
-		if( polygonFile->read( &polygon[0] , sizeof(int)*pSize ) )
+		if( threadIndex<(int)polygonFiles.size() )
 		{
-			vertices.resize( pSize );
-			for( int i=0 ; i<int(polygon.size()) ; i++ )
-				if( polygon[i]<0 ) vertices[i].idx = -polygon[i]-1 , vertices[i].inCore = false;
-				else               vertices[i].idx =  polygon[i]   , vertices[i].inCore = true;
-			return 1;
+			int pSize;
+			if( polygonFiles[threadIndex]->read( &pSize , sizeof(int) ) )
+			{
+				std::vector< int > polygon( pSize );
+				if( polygonFiles[threadIndex]->read( &polygon[0] , sizeof(int)*pSize ) )
+				{
+					vertices.resize( pSize );
+					for( int i=0 ; i<int(polygon.size()) ; i++ )
+						if( polygon[i]<0 ) vertices[i].idx = -polygon[i]-1 , vertices[i].inCore = false;
+						else               vertices[i].idx =  polygon[i]   , vertices[i].inCore = true;
+						return 1;
+				}
+				ERROR_OUT( "Failed to read polygon from file" );
+			}
+			else threadIndex++;
 		}
-		return 0;
+		else return 0;
 	}
-	else return 0;
 }
 template< class Vertex >
 int CoredFileMeshData< Vertex >::outOfCorePointCount( void ){ return oocPoints; }
 template< class Vertex >
-int CoredFileMeshData< Vertex >::polygonCount( void ) { return polygons; }
+int CoredFileMeshData< Vertex >::polygonCount( void )
+{
+	int count = 0;
+	for( int i=0 ; i<polygons.size() ; i++ ) count += polygons[i];
+	return count;
+}
+
+/////////////
+// Simplex //
+/////////////
+template< class Real , unsigned int Dim , unsigned int K >
+void Simplex< Real , Dim , K >::split( Point< Real , Dim > pNormal , Real pOffset , std::vector< Simplex >& back , std::vector< Simplex >& front ) const
+{
+	Real values[K+1];
+	bool frontSet = false , backSet = false;
+
+	// Evaluate the hyper-plane's function at the vertices and mark if strictly front/back vertices have been found
+	for( int k=0 ; k<=K ; k++ )
+	{
+		values[k] = Point< Real , Dim >::Dot( p[k] , pNormal ) - pOffset;
+		backSet |= ( values[k]<0 ) , frontSet |= ( values[k]>0 );
+	}
+
+	// If all the vertices are behind or on, or all the vertices are in front or on, we are done.
+	if( !frontSet ){ back.push_back( *this ) ; return; }
+	if( !backSet ){ front.push_back( *this ) ; return; }
+
+	// Pick some intersection of the hyper-plane with a simplex edge
+	int v1 , v2;
+	Point< Real , Dim > midPoint;
+	{
+		for( int i=0 ; i<K ; i++ ) for( int j=i+1 ; j<=K ; j++ ) if( values[i]*values[j]<0 )
+		{
+			v1 = i , v2 = j;
+			Real t1 = values[i] / ( values[i] - values[j] ) , t2 = (Real)( 1. - t1 );
+			midPoint = p[j]*t1 + p[i]*t2;
+		}
+	}
+	// Iterate over each face of the simplex, split it with the hyper-plane and connect the sub-simplices to the mid-point
+	for( int i=0 ; i<=K ; i++ )
+	{
+		if( i!=v1 && i!=v2 ) continue;
+		Simplex< Real , Dim , K-1 > f;		// The face
+		Simplex< Real , Dim , K > s;		// The sub-simplex
+		for( int j=0 , idx=0 ; j<=K ; j++ )	if( j!=i ) f[idx++] = p[j];
+		std::vector< Simplex< Real , Dim , K-1 > > _back , _front;
+		f.split( pNormal , pOffset , _back , _front );
+		s[i] = midPoint;
+
+		for( int j=0 ; j<_back.size() ; j++ ) 
+		{
+			for( int k=0 ; k<K ; k++ ) s[ k<i ? k : k+1 ] = _back[j][k];
+			back.push_back( s );
+		}
+
+		for( int j=0 ; j<_front.size() ; j++ ) 
+		{
+			for( int k=0 ; k<K ; k++ ) s[ k<i ? k : k+1 ] = _front[j][k];
+			front.push_back( s );
+		}
+	}
+}
\ No newline at end of file
diff --git a/Src/Image.h b/Src/Image.h
new file mode 100644
index 0000000..345a3d7
--- /dev/null
+++ b/Src/Image.h
@@ -0,0 +1,441 @@
+#ifndef IMAGE_INCLUDED
+#define IMAGE_INCLUDED
+
+#define SUPPORT_TILES
+
+#include <string.h>
+#include "MyMiscellany.h"
+
+struct ImageReader
+{
+	virtual unsigned int nextRow( unsigned char* row ) = 0;
+	static unsigned char* Read( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+	{
+		ImageReader* reader = Get( fileName );
+		width = reader->width() , height = reader->height() , channels = reader->channels();
+		unsigned char* pixels = new unsigned char[ width*height*channels ];
+		for( unsigned int j=0 ; j<height ; j++ ) reader->nextRow( pixels + j*width*channels );
+		delete reader;
+		return pixels;
+	}
+	static unsigned char* ReadColor( const char* fileName , unsigned int& width , unsigned int& height )
+	{
+		unsigned int channels;
+		ImageReader* reader = Get( fileName );
+		width = reader->width() , height = reader->height();
+		if( channels!=1 && channels!=3 ) ERROR_OUT( "Requres one- or three-channel input" );
+		unsigned char* pixels = new unsigned char[ width*height*3 ];
+		unsigned char* pixelRow = new unsigned char[ width*channels];
+		for( unsigned int j=0 ; j<height ; j++ )
+		{
+			reader->nextRow( pixelRow );
+			if     ( channels==3 ) memcpy( pixels+j*width*3 , pixelRow , sizeof(unsigned char)*width*3 );
+			else if( channels==1 ) for( unsigned int i=0 ; i<width ; i++ ) for( unsigned int c=0 ; c<3 ; c++ ) pixels[j*width*3+i*3+c] = pixelRow[i];
+		}
+		delete[] pixelRow;
+		delete reader;
+		return pixels;
+	}
+
+	static bool ValidExtension( const char *ext );
+	static ImageReader* Get( const char* fileName );
+	static void GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+	virtual ~ImageReader( void ){ }
+	unsigned int width( void ) const { return _width; }
+	unsigned int height( void ) const { return _height; }
+	unsigned int channels( void ) const { return _channels; }
+protected:
+	unsigned int _width , _height , _channels;
+};
+struct ImageWriterParams
+{
+	static const char* DefaultTileExtension;
+	unsigned int quality;
+#ifdef SUPPORT_TILES
+	const char* tileExtension;
+	unsigned int tileWidth , tileHeight;
+	ImageWriterParams* tileParams;
+	ImageWriterParams( void ) : quality( 100 ) , tileExtension( DefaultTileExtension ) , tileWidth( 4096 ) , tileHeight( 4096 ) , tileParams( NULL ){};
+#else // !SUPPORT_TILES
+	ImageWriterParams( void ) : quality( 100 ){};
+#endif // SUPPORT_TILES
+};
+const char* ImageWriterParams::DefaultTileExtension = "jpg";
+struct ImageWriter
+{
+	virtual unsigned int nextRow( const unsigned char* row ) = 0;
+	virtual unsigned int nextRows( const unsigned char* rows , unsigned int rowNum ){ unsigned int row ; for( unsigned int r=0 ; r<rowNum ; r++ ) row = nextRow( rows + _width * _channels * r ) ; return row; }
+	static void Write( const char* fileName , const unsigned char* pixels , unsigned int width , unsigned int height , unsigned int channels , ImageWriterParams params=ImageWriterParams() )
+	{
+		ImageWriter* writer = Get( fileName , width , height , channels , params );
+		for( unsigned int j=0 ; j<height ; j++ ) writer->nextRow( pixels + j*width*channels );
+		delete writer;
+	}
+
+	static bool ValidExtension( const char *ext );
+	static ImageWriter* Get( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , ImageWriterParams params=ImageWriterParams() );
+	virtual ~ImageWriter( void ){ }
+	unsigned int width( void ) const { return _width; }
+	unsigned int height( void ) const { return _height; }
+	unsigned int channels( void ) const { return _channels; }
+protected:
+	unsigned int _width , _height , _channels;
+};
+
+#ifdef SUPPORT_TILES
+struct TiledImageReader : public ImageReader
+{
+	unsigned int nextRow( unsigned char* row );
+	TiledImageReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+	~TiledImageReader( void );
+	static bool GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+protected:
+	ImageReader** _tileReaders;
+	char** _tileNames;
+	unsigned int _tileRows , _tileColumns , _currentPixelRow , _currentTileRow , *_tileWidths , *_tileHeights;
+};
+struct TiledImageWriter : public ImageWriter
+{
+	unsigned int nextRow( const unsigned char* row );
+	TiledImageWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , ImageWriterParams params );
+	~TiledImageWriter( void );
+protected:
+	ImageWriter** _tileWriters;
+	char** _tileNames;
+	unsigned int _tileWidth , _tileHeight , _tileRows , _tileColumns , _currentPixelRow;
+	ImageWriterParams _params;
+};
+#endif // SUPPORT_TILES
+
+
+// [WARNING] Need to include "png.h" before "jpeg.h" so that "setjmp.h" is not already included (?)
+#include "PNG.h"
+#include "JPEG.h"
+
+struct FileNameParser
+{
+#if defined( _WIN32 ) || defined( _WIN64 )
+	static const char Separator = (char)'\\';
+#else // !_WIN
+	static const char Separator = (char)'/';
+#endif // _WIN
+	static inline char* Extension  ( const char* fileName ){ return __Split( fileName , '.' , false , false ); }
+	static inline char* Header     ( const char* fileName ){ return __Split( fileName , '.' , true , false ); }
+	static inline char* Local      ( const char* fileName ){ return __Split( fileName , Separator , false , false ); }
+	static inline char* Dir        ( const char* fileName ){ return __Split( fileName , Separator , true , false ); }
+	static inline char* LocalHeader( const char* fileName )
+	{
+		char* localFileName = Local( fileName );
+		if( !localFileName ) ERROR_OUT( "Couldn't get local file name: %s" , fileName );
+		char* localFileHeader = Header( localFileName );
+		delete[] localFileName;
+		return localFileHeader;
+	}
+
+protected:
+	static inline char* __Split( const char* fileName , char splitChar , bool front , bool first )
+	{
+		int position;
+		char* out;
+		if( first ){ for( position=0 ; position<strlen(fileName) ; position++ ) if( fileName[position]==splitChar ) break; }
+		else       { for( position=(int)strlen(fileName)-1 ; position>=0 ; position-- ) if( fileName[position]==splitChar ) break; }
+
+		if( front )
+		{
+			if( position==-1 ) out = NULL;
+			else
+			{
+				out = new char[ strlen(fileName)+1 ];
+				strcpy( out , fileName );
+				out[ position ] = 0;
+			}
+		}
+		else
+		{
+			if( position==strlen(fileName) ) out = NULL;
+			else
+			{
+				out = new char[ strlen(fileName)-position ];
+				strcpy( out , fileName+position+1 );
+			}
+		}
+		return out;
+	}
+};
+
+inline bool ImageReader::ValidExtension( const char *ext )
+{
+#ifdef WIN32
+	if     ( !_stricmp( ext , "jpeg" ) || !_stricmp( ext , "jpg" ) ) return true;
+	else if( !_stricmp( ext , "png" )                              ) return true;
+	else if( !_stricmp( ext , "iGrid" )                            ) return true;
+#else // !WIN32
+	if( !strcasecmp( ext , "jpeg" ) || !strcasecmp( ext , "jpg" ) ) return true;
+	else if( !strcasecmp( ext , "png" )                           ) return true;
+	else if( !strcasecmp( ext , "iGrid" )                         ) return true;
+#endif // WIN32
+	return false;
+}
+
+inline ImageReader* ImageReader::Get( const char* fileName )
+{
+	unsigned int width , height , channels;
+	ImageReader* reader = NULL;
+	char* ext = FileNameParser::Extension( fileName );
+#ifdef WIN32
+	if     ( !_stricmp( ext , "jpeg" ) || !_stricmp( ext , "jpg" ) ) reader = new       JPEGReader( fileName , width , height , channels );
+	else if( !_stricmp( ext , "png" )                              ) reader = new        PNGReader( fileName , width , height , channels );
+	else if( !_stricmp( ext , "iGrid" )                            ) reader = new TiledImageReader( fileName , width , height , channels );
+#else // !WIN32
+	if( !strcasecmp( ext , "jpeg" ) || !strcasecmp( ext , "jpg" ) ) reader = new       JPEGReader( fileName , width , height , channels );
+	else if( !strcasecmp( ext , "png" )                           ) reader = new        PNGReader( fileName , width , height , channels );
+	else if( !strcasecmp( ext , "iGrid" )                         ) reader = new TiledImageReader( fileName , width , height , channels );
+#endif // WIN32
+	else
+	{
+		delete[] ext;
+		THROW( "failed to get image reader for: %s" , fileName );
+	}
+	reader->_width = width;
+	reader->_height = height;
+	reader->_channels = channels;
+
+	delete[] ext;
+	return reader;
+}
+inline void ImageReader::GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	char* ext = FileNameParser::Extension( fileName );
+#ifdef WIN32
+	if( !_stricmp( ext , "jpeg" ) || !_stricmp( ext , "jpg" ) ) JPEGReader::GetInfo( fileName , width , height , channels );
+	else if( !_stricmp( ext , "png" ) )                          PNGReader::GetInfo( fileName , width , height , channels );
+	else if( !_stricmp( ext , "iGrid" ) )                 TiledImageReader::GetInfo( fileName , width , height , channels );
+#else // !WIN32
+	if( !strcasecmp( ext , "jpeg" ) || !strcasecmp( ext , "jpg" ) ) JPEGReader::GetInfo( fileName , width , height , channels );
+	else if( !strcasecmp( ext , "png" ) )                            PNGReader::GetInfo( fileName , width , height , channels );
+	else if( !strcasecmp( ext , "iGrid" ) )                   TiledImageReader::GetInfo( fileName , width , height , channels );
+#endif // WIN32
+	delete[] ext;
+}
+
+inline bool ImageWriter::ValidExtension( const char *ext )
+{
+#ifdef WIN32
+	if( !_stricmp( ext , "jpeg" ) || !_stricmp( ext , "jpg" ) ) return true;
+	else if( !_stricmp( ext , "png" ) )                         return true;
+#ifdef SUPPORT_TILES
+	else if( !_stricmp( ext , "iGrid" ) )                       return true;
+#endif // SUPPORT_TILES
+#else // !WIN32
+	if( !strcasecmp( ext , "jpeg" ) || !strcasecmp( ext , "jpg" ) ) return true;
+	else if( !strcasecmp( ext , "png" ) )                           return true;
+#ifdef SUPPORT_TILES
+	else if( !strcasecmp( ext , "iGrid" ) )                         return true;
+#endif // SUPPORT_TILES
+#endif // WIN32
+	return false;
+}
+
+inline ImageWriter* ImageWriter::Get( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , ImageWriterParams params )
+{
+	ImageWriter* writer = NULL;
+	char* ext = FileNameParser::Extension( fileName );
+#ifdef WIN32
+	if( !_stricmp( ext , "jpeg" ) || !_stricmp( ext , "jpg" ) ) writer = new JPEGWriter( fileName , width , height , channels , params.quality );
+	else if( !_stricmp( ext , "png" ) ) writer = new PNGWriter( fileName , width , height , channels , params.quality );
+#ifdef SUPPORT_TILES
+	else if( !_stricmp( ext , "iGrid" ) ) writer = new TiledImageWriter( fileName , width , height , channels , params );
+#endif // SUPPORT_TILES
+#else // !WIN32
+	if( !strcasecmp( ext , "jpeg" ) || !strcasecmp( ext , "jpg" ) ) writer = new JPEGWriter( fileName , width , height , channels , params.quality );
+	else if( !strcasecmp( ext , "png" ) ) writer = new PNGWriter( fileName , width , height , channels , params.quality );
+#ifdef SUPPORT_TILES
+	else if( !strcasecmp( ext , "iGrid" ) ) writer = new TiledImageWriter( fileName , width , height , channels , params );
+#endif // SUPPORT_TILES
+#endif // WIN32
+	else
+	{
+		delete[] ext;
+		THROW( "failed to get image writer for: %s" , fileName );
+	}
+	writer->_width = width;
+	writer->_height = height;
+	writer->_channels = channels;
+
+	delete[] ext;
+	return writer;
+}
+
+#ifdef SUPPORT_TILES
+bool TiledImageReader::GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	char* fileDir = FileNameParser::Dir( fileName );
+	unsigned int *_tileHeights , *_tileWidths;
+	unsigned int _tileRows , _tileColumns , _channels;
+	FILE* fp = fopen( fileName , "r" );
+	if( !fp ){ WARN( "Couldn't open file for reading: %s" , fileName ) ; return false; }
+	{
+		char line[1024];
+		if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed to read column line from: %s" , fileName );
+		line[strlen(line)-1] = 0;
+		if( sscanf( line , "Columns: %d" , &_tileColumns )!=1 ) ERROR_OUT( "Failed to read column count from: %s (%s)" , fileName , line );
+		if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed to read row line from: %s" , fileName );
+		line[strlen(line)-1] = 0;
+		if( sscanf( line , "Rows: %d" , &_tileRows )!=1 ) ERROR_OUT( "Failed to read row count from: %s (%s)" , fileName , line );
+		_tileHeights = new unsigned int[ _tileRows+1 ];
+		_tileWidths  = new unsigned int[ _tileColumns+1 ];
+
+		char tileName[1024];
+		for( unsigned int r=0 ; r<_tileRows ; r++ ) for( unsigned int c=0 ; c<_tileColumns ; c++ )
+		{
+			if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed to read tile name from: %s" , fileName );
+			line[strlen(line)-1] = 0;
+			if( fileDir ) sprintf( tileName , "%s%c%s" , fileDir , FileNameParser::Separator , line );
+			else          sprintf( tileName , "%s" , line );
+
+			unsigned int _w , _h , _c;
+			ImageReader::GetInfo( tileName , _w , _h , _c );
+			if( !r && !c ) _channels = _c;
+			else if( _channels!=_c ) ERROR_OUT( "Number of color channels don't match: %d != %d" , _channels , _c );
+			if( !r ) _tileWidths[c+1] = _w;
+			else if( _tileWidths[c+1]!=_w ) ERROR_OUT( "Images in the same column must have the same width: %d != %d" , _tileWidths[c+1] , _w );
+			if( !c ) _tileHeights[r+1] = _h;
+			else if( _tileHeights[r+1]!=_h ) ERROR_OUT( "Images in the same row must have the same heights: %d != %d" , _tileHeights[r+1] , _h );
+		}
+	}
+	fclose( fp );
+	if( fileDir ) delete[] fileDir;
+	_tileWidths[0] = _tileHeights[0] = 0;
+	for( unsigned int c=0 ; c<_tileColumns ; c++ ) _tileWidths[c+1] += _tileWidths[c];
+	for( unsigned int r=0 ; r<_tileRows ; r++ ) _tileHeights[r+1] += _tileHeights[r];
+	width = _tileWidths[_tileColumns] , height = _tileHeights[_tileRows] , channels = _channels;
+	return true;
+}
+
+TiledImageReader::TiledImageReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	char* fileDir = FileNameParser::Dir( fileName );
+	FILE* fp = fopen( fileName , "r" );
+	if( !fp ) ERROR_OUT( "Couldn't open file for reading: %s" , fileName );
+	{
+		char line[1024];
+		if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed read column line from: %s" , fileName );
+		line[strlen(line)-1] = 0;
+		if( sscanf( line , "Columns: %d" , &_tileColumns )!=1 ) ERROR_OUT( "Failed to read column count from: %s (%s)" , fileName , line );
+		if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed read row line from: %s" , fileName );
+		line[strlen(line)-1] = 0;
+		if( sscanf( line , "Rows: %d" , &_tileRows )!=1 ) ERROR_OUT( "Failed to read row count from: %s (%s)" , fileName , line );
+
+		_tileReaders = new ImageReader*[ _tileColumns ];
+		_tileHeights = new unsigned int[ _tileRows+1 ];
+		_tileWidths  = new unsigned int[ _tileColumns+1 ];
+
+		_tileNames = new char*[ _tileColumns * _tileRows ];
+		char tileName[1024];
+		for( unsigned int r=0 ; r<_tileRows ; r++ ) for( unsigned int c=0 ; c<_tileColumns ; c++ )
+		{
+			if( !fgets( line , 1024 , fp ) ) ERROR_OUT( "Failed to read tile name from: %s" , fileName );
+			line[strlen(line)-1] = 0;
+			if( fileDir ) sprintf( tileName , "%s%c%s" , fileDir , FileNameParser::Separator , line );
+			else          sprintf( tileName , "%s" , line );
+			_tileNames[r*_tileColumns+c] = new char[ strlen(tileName)+1 ];
+			strcpy( _tileNames[r*_tileColumns+c] , tileName );
+		}
+	}
+	fclose( fp );
+	if( fileDir ) delete[] fileDir;
+	for( unsigned int r=0 ; r<_tileRows ; r++ ) for( unsigned int c=0 ; c<_tileColumns ; c++ )
+	{
+		unsigned int _w , _h , _c;
+		ImageReader::GetInfo( _tileNames[r*_tileColumns+c] , _w , _h , _c );
+		if( !r && !c ) _channels = _c;
+		else if( _channels!=_c ) ERROR_OUT( "Number of color channels don't match: %d != %d" , _channels , _c );
+		if( !r ) _tileWidths[c+1] = _w;
+		else if( _tileWidths[c+1]!=_w ) ERROR_OUT( "Images in the same column must have the same width: %d != %d" , _tileWidths[c+1] , _w );
+		if( !c ) _tileHeights[r+1] = _h;
+		else if( _tileHeights[r+1]!=_h ) ERROR_OUT( "Images in the same row must have the same heights: %d != %d" , _tileHeights[r+1] , _h );
+	}
+	_tileWidths[0] = _tileHeights[0] = 0;
+	for( unsigned int c=0 ; c<_tileColumns ; c++ ) _tileWidths[c+1] += _tileWidths[c];
+	for( unsigned int r=0 ; r<_tileRows ; r++ ) _tileHeights[r+1] += _tileHeights[r];
+	width = _width = _tileWidths[_tileColumns] , height = _height = _tileHeights[_tileRows] , channels = _channels;
+	_currentPixelRow = _currentTileRow = 0;
+}
+TiledImageReader::~TiledImageReader( void )
+{
+	delete[] _tileReaders;
+	for( unsigned int i=0 ; i<_tileColumns*_tileRows ; i++ ) delete[] _tileNames[i];
+	delete[] _tileNames;
+	delete[] _tileWidths;
+	delete[] _tileHeights;
+}
+unsigned TiledImageReader::nextRow( unsigned char* row )
+{
+	// If it's the first row, set up the readers
+	if( _currentPixelRow==_tileHeights[ _currentTileRow ] ) for( unsigned int c=0 ; c<_tileColumns ; c++ ) _tileReaders[c] = ImageReader::Get( _tileNames[ _currentTileRow * _tileColumns + c ] );
+
+	// Read the row fragments
+	for( unsigned int c=0 ; c<_tileColumns ; c++ ) _tileReaders[c]->nextRow( row + c * _tileWidths[c] * _channels );
+
+	// If it's the last row of the tile, free up the readers
+	if( _currentPixelRow==_tileHeights[_currentTileRow+1]-1 )
+	{
+		for( unsigned int c=0 ; c<_tileColumns ; c++ ) delete _tileReaders[c];
+		_currentTileRow++;
+	}
+
+	return _currentPixelRow++;
+}
+
+TiledImageWriter::TiledImageWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , ImageWriterParams params )
+{
+	_width = width , _height = height , _channels = channels , _tileWidth = params.tileWidth , _tileHeight = params.tileHeight;
+	_tileColumns = ( _width + ( _tileWidth-1 ) ) / _tileWidth , _tileRows = ( _height + ( _tileHeight-1 ) ) / _tileHeight;
+	_tileWriters = new ImageWriter*[ _tileColumns ];
+	_tileNames = new char*[ _tileColumns * _tileRows ];
+	if( params.tileParams ) _params = *params.tileParams;
+
+	char tileName[1024];
+	char* tileHeader = FileNameParser::Header( fileName );
+	for( unsigned int r=0 ; r<_tileRows ; r++ ) for( unsigned int c=0 ; c<_tileColumns ; c++ )
+	{
+		sprintf( tileName , "%s.%d.%d.%s" , tileHeader , c , r , params.tileExtension );
+		_tileNames[r*_tileColumns+c] = new char[ strlen(tileName)+1 ];
+		strcpy( _tileNames[r*_tileColumns+c] , tileName );
+	}
+	delete[] tileHeader;
+	FILE* fp = fopen( fileName , "w" );
+	if( !fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
+	fprintf( fp , "Columns: %d\n" , _tileColumns );
+	fprintf( fp , "Rows: %d\n" , _tileRows );
+	for( unsigned int i=0 ; i<_tileRows*_tileColumns ; i++ )
+	{
+		char* localTileName = FileNameParser::Local( _tileNames[i] );
+		fprintf( fp , "%s\n" , localTileName );
+		delete[] localTileName;
+	}
+	fclose( fp );
+	_currentPixelRow = 0;
+}
+TiledImageWriter::~TiledImageWriter( void )
+{
+	delete[] _tileWriters;
+	for( unsigned int i=0 ; i<_tileColumns*_tileRows ; i++ ) delete[] _tileNames[i];
+	delete[] _tileNames;
+}
+unsigned int TiledImageWriter::nextRow( const unsigned char* row )
+{
+	unsigned int r = _currentPixelRow / _tileHeight;
+	if( ( _currentPixelRow % _tileHeight )==0 )
+	{
+		for( unsigned int c=0 ; c<_tileColumns ; c++ )
+			_tileWriters[c] = ImageWriter::Get( _tileNames[ r * _tileColumns + c ] , std::min< unsigned int >( _tileWidth , _width - _tileWidth*c ) , std::min< unsigned int >( _tileHeight , _height - _tileHeight*r ) , _channels , _params );
+	}
+	for( int c=0 ; c<(int)_tileColumns ; c++ ) _tileWriters[c]->nextRow( row + c * _tileWidth * _channels );
+	if( ( _currentPixelRow % _tileHeight )==( _tileHeight-1 ) || _currentPixelRow==(_height-1) ) for( unsigned int c=0 ; c<_tileColumns ; c++ ) delete _tileWriters[c];
+
+	return _currentPixelRow++;
+}
+#endif // SUPPORT_TILES
+
+#endif // IMAGE_INCLUDED
diff --git a/Src/ImageStitching.cpp b/Src/ImageStitching.cpp
new file mode 100644
index 0000000..c7eca96
--- /dev/null
+++ b/Src/ImageStitching.cpp
@@ -0,0 +1,541 @@
+/*
+Copyright (c) 2013, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#undef ARRAY_DEBUG
+#undef FAST_COMPILE
+#undef USE_DOUBLE
+#define DIMENSION 2
+#define USE_DEEP_TREE_NODES
+#define ROW_BLOCK_SIZE 16
+#define DEFAULT_FEM_DEGREE 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <algorithm>
+#include "Image.h"
+#include "MyMiscellany.h"
+#include "Array.h"
+#include "CmdLineParser.h"
+#include "Geometry.h"
+#include "FEMTree.h"
+
+cmdLineParameterArray< char* , 2 >
+	In( "in" );
+cmdLineParameter< char* >
+	Out( "out" );
+cmdLineParameter< int >
+#ifdef FAST_COMPILE
+#else // !FAST_COMPILE
+	Degree( "degree" , DEFAULT_FEM_DEGREE ) ,
+#endif // FAST_COMPILE
+	Threads( "threads" , omp_get_num_procs() ) ,
+	MaxMemoryGB( "maxMemory" , 0 ) ,
+	GSIterations( "iters" , 8 ) ,
+	FullDepth( "fullDepth" , 6 ) ,
+	BaseDepth( "baseDepth" , 6 ) ,
+	BaseVCycles( "baseVCycles" , 4 );
+cmdLineReadable
+	Verbose( "verbose" ) ,
+	ShowResidual( "residual" ) ,
+	Performance( "performance" );
+cmdLineParameter< float >
+	WeightScale   ( "wScl", 0.125f ) ,
+	WeightExponent( "wExp" , 6.f );
+
+cmdLineReadable* params[] =
+{
+	&In , &Out , &Threads , &Verbose , &ShowResidual , &GSIterations , &FullDepth ,
+	&BaseDepth , &BaseVCycles ,
+	&WeightScale , &WeightExponent ,
+	&Performance ,
+	&MaxMemoryGB ,
+#if !defined( FAST_COMPILE )
+	&Degree , 
+#endif // !FAST_COMPILE
+	NULL
+};
+
+void ShowUsage( char* ex )
+{
+	printf( "Usage: %s\n" , ex );
+	printf( "\t --%s <input color / labels>\n" , In.name );
+	printf( "\t[--%s <ouput stitched image>]\n" , Out.name );
+#if !defined( FAST_COMPILE )
+	printf( "\t[--%s <b-spline degree>=%d]\n" , Degree.name , Degree.value );
+#endif // !FAST_COMPILE
+	printf( "\t[--%s <GS iterations>=%d]\n" , GSIterations.name , GSIterations.value );
+	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
+	printf( "\t[--%s <parallelization threads>=%d]\n" , Threads.name , Threads.value );
+	printf( "\t[--%s <successive under-relaxation scale>=%f]\n", WeightScale.name , WeightScale.value );
+	printf( "\t[--%s <successive under-relaxation exponent>=%f]\n", WeightExponent.name , WeightExponent.value );
+	printf( "\t[--%s <maximum memory (in GB)>=%d]\n" , MaxMemoryGB.name , MaxMemoryGB.value );
+	printf( "\t[--%s]\n" , Performance.name );
+	printf( "\t[--%s <coarse MG solver depth>=%d]\n" , BaseDepth.name , BaseDepth.value );
+	printf( "\t[--%s <coarse MG solver v-cycles>=%d]\n" , BaseVCycles.name , BaseVCycles.value );
+	printf( "\t[--%s]\n" , ShowResidual.name );
+	printf( "\t[--%s]\n" , Verbose.name );
+}
+
+struct RGBPixel
+{
+	unsigned char rgb[3];
+	unsigned char& operator[]( int idx ){ return rgb[idx]; }
+	const unsigned char& operator[]( int idx ) const { return rgb[idx]; }
+	int mask( void ) const
+	{
+		if( rgb[0]==255 && rgb[1]==255 && rgb[2]==255 ) return -1;
+		else return ( (int)rgb[0] )<<16 | ( (int)rgb[1] )<<8 | ( (int)rgb[2] );
+	}
+
+	RGBPixel( void ){ rgb[0] = rgb[1] = rgb[2] = 0; }
+	RGBPixel( double r , double g , double b )
+	{
+		rgb[0] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( r*255 ) ) ) );
+		rgb[1] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( g*255 ) ) ) );
+		rgb[2] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( b*255 ) ) ) );
+	}
+	RGBPixel( float r , float g , float b )
+	{
+		rgb[0] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( r*255 ) ) ) );
+		rgb[1] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( g*255 ) ) ) );
+		rgb[2] = (unsigned char)( std::max< int >( 0 , std::min< int >( 255 , (int)( b*255 ) ) ) );
+	}
+
+	template< class Real >
+	static Point< Real , 3 > ToPoint( RGBPixel rgb )
+	{
+		Point< Real , 3 > p;
+		for( int c=0 ; c<3 ; c++ ) p[c] = (Real)( ( (double)rgb[c] ) / 255. );
+		return p;
+	}
+};
+
+void WriteImage( char* fileName , RGBPixel* pixels , int w , int h )
+{
+	unsigned int _w = w , _h = h , _c = 3;
+	ImageWriter::Write( fileName , (const unsigned char*)pixels , _w , _h , _c );
+}
+
+struct Profiler
+{
+	double t;
+	Profiler( void ){ t = Time(); }
+	void print( bool newLine=false ) const
+	{
+		printf( "%.2f (s) ; %d (MB)" , Time()-t , MemoryInfo::PeakMemoryUsageMB() );
+		if( newLine ) printf( "\n" );
+	}
+};
+
+template< unsigned int Colors >
+void ReadAndWrite( ImageReader* pixels , ImageReader* labels , ImageWriter* output )
+{
+	RGBPixel* pixelRow = new RGBPixel[ pixels->width() ];
+	RGBPixel* labelRow = new RGBPixel[ labels->width() ];
+	for( unsigned int r=0 ; r<pixels->height() ; r++ )
+	{
+		if( Verbose.set ) printf( "%d / %d       \r" , r ,pixels->height() );
+		pixels->nextRow( (unsigned char*)pixelRow );
+		labels->nextRow( (unsigned char*)labelRow );
+		output->nextRow( (unsigned char*)pixelRow );
+	}
+	if( Verbose.set ) printf( "\n" );
+	delete[] pixelRow;
+	delete[] labelRow;
+}
+
+template< class Real , unsigned int Colors >
+struct BufferedImageDerivativeStream : public FEMTreeInitializer< DIMENSION , Real >::template DerivativeStream< Point< Real , Colors > >
+{
+	BufferedImageDerivativeStream( const unsigned int resolution[] , ImageReader* pixels , ImageReader* labels ) : _pixels( pixels ) , _labels( labels )
+	{
+		memcpy( _resolution , resolution , sizeof( unsigned int ) * DIMENSION );
+		for( int i=0 ; i<3 ; i++ )
+		{
+			_pixelRows[i] = new RGBPixel[ _resolution[0] ];
+			_labelRows[i] = new RGBPixel[ _resolution[0] ];
+			_maskRows [i] = new      int[ _resolution[0] ];
+		}
+		if( pixels->channels()!=3 && pixels->channels()!=1 ) ERROR_OUT( "Pixel input must have 1 or 3 channels: %d" , pixels->channels() );
+		if( labels->channels()!=3 && labels->channels()!=1 ) ERROR_OUT( "Label input must have 1 or 3 channels: %d" , labels->channels() );
+		__pixelRow = pixels->channels()==3 ? NULL : new unsigned char[ _resolution[0] ];
+		__labelRow = labels->channels()==3 ? NULL : new unsigned char[ _resolution[0] ];
+		_r = -2 ; prefetch();
+		_r = -1 ; prefetch();
+		_c = _r = _dir = 0;
+	}
+	~BufferedImageDerivativeStream( void )
+	{
+		for( int i=0 ; i<3 ; i++ ) delete[] _pixelRows[i] , delete[] _labelRows[i] , delete[] _maskRows[i];
+		if( __pixelRow ) delete[] __pixelRow;
+		if( __labelRow ) delete[] __labelRow;
+	}
+	void resolution( unsigned int res[] ) const { memcpy( res , _resolution , sizeof(_resolution) ); }
+
+	void advance( void ){ _c = _dir = 0 , _r++; }
+	void prefetch( void )
+	{
+		if( _r+2<(int)_resolution[1] )
+		{
+			int j = (_r+2)%3;
+			RGBPixel *pixelRow = _pixelRows[j] , *labelRow = _labelRows[j];
+			int *maskRow = _maskRows[j];
+			if( _pixels->channels()==3 ) _pixels->nextRow( (unsigned char*)pixelRow );
+			else
+			{
+				_pixels->nextRow( __pixelRow );
+				for( int i=0 ; i<(int)_resolution[0] ; i++ ) pixelRow[i][0] = pixelRow[i][1] = pixelRow[i][2] = __pixelRow[i];
+			}
+			if( _labels->channels()==3 ) _labels->nextRow( (unsigned char*)labelRow );
+			else
+			{
+				_labels->nextRow( __labelRow );
+				for( int i=0 ; i<(int)_resolution[0] ; i++ ) labelRow[i][0] = labelRow[i][1] = labelRow[i][2] = __labelRow[i];
+			}
+#pragma omp parallel for
+			for( int i=0 ; i<(int)_resolution[0] ; i++ ) maskRow[i] = labelRow[i].mask();
+		}
+	}
+
+	bool nextDerivative( unsigned int idx[] , unsigned int& dir , Point< Real , Colors >& dValue )
+	{
+		const RGBPixel *pixelRow1 = _pixelRows[_r%3] , *pixelRow2 = _pixelRows[(_r+1)%3];
+		const int *maskRow1 = _maskRows[_r%3] , *maskRow2 = _maskRows[(_r+1)%3];
+		if( _dir==0 )
+		{
+			for( ; _c<(int)_resolution[0]-1 ; _c++ )
+			{
+				if( maskRow1[_c]!=maskRow1[_c+1] && maskRow1[_c]>=0 && maskRow1[_c+1]>=0 )
+				{
+					idx[0] = _c , idx[1] = _r , dir = _dir;
+					dValue = RGBPixel::ToPoint< Real >( pixelRow1[_c+1] ) - RGBPixel::ToPoint< Real >( pixelRow1[_c] );
+					_c++;
+					return true;
+				}
+			}
+			_dir = 1 , _c = 0;
+		}
+		if( _dir==1 )
+		{
+			if( _r+1<(int)_resolution[1] )
+			{
+				for( ; _c<(int)_resolution[0] ; _c++ )
+				{
+					if( maskRow1[_c]!=maskRow2[_c] && maskRow1[_c]>=0 && maskRow2[_c]>=0 )
+					{
+						idx[0] = _c , idx[1] = _r , dir = _dir;
+						dValue = RGBPixel::ToPoint< Real >( pixelRow2[_c] ) - RGBPixel::ToPoint< Real >( pixelRow1[_c] );
+						_c++;
+						return true;
+					}
+				}
+			}
+		}
+		return false;
+	}
+protected:
+	int _r , _c , _dir;
+	unsigned int _resolution[DIMENSION];
+	ImageReader *_pixels , *_labels;
+	RGBPixel *_pixelRows[3] , *_labelRows[3];
+	unsigned char *__pixelRow , *__labelRow;
+	int* _maskRows[3];
+};
+
+template< typename Real , unsigned int Degree >
+void _Execute( void )
+{
+	int w , h;
+	{
+		unsigned int _w , _h , _c;
+		ImageReader::GetInfo( In.values[0] , _w , _h , _c );
+		w = _w , h = _h;
+		ImageReader::GetInfo( In.values[1] , _w , _h , _c );
+		if( w!=_w || h!=_h ) ERROR_OUT( "Pixel and label dimensions don't match: %d x %d != %d x %d" , _w , _h , w , h );
+	}
+	if( Verbose.set ) printf( "Resolution: %d x %d\n" , w , h );
+
+	static const unsigned int Dim = DIMENSION;
+	static const unsigned int Colors = 3;
+	static const unsigned int FEMSig = FEMDegreeAndBType< Degree , BOUNDARY_NEUMANN >::Signature;
+
+	FEMTree< Dim , Real > tree( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	std::vector< NodeSample< Dim , Point< Real , Colors > > > derivatives[Dim];
+	int maxDepth;
+	DenseNodeData< Point< Real , Colors > , IsotropicUIntPack< Dim , FEMSig > > constraints;
+	DenseNodeData< Point< Real , Colors > , IsotropicUIntPack< Dim , FEMSig > > solution;
+	{
+		Profiler p;
+		ImageReader* pixels = ImageReader::Get( In.values[0] );
+		ImageReader* labels = ImageReader::Get( In.values[1] );
+		unsigned int resolution[] = { (unsigned int )w , (unsigned int )h };
+		BufferedImageDerivativeStream< Real , Colors > dStream( resolution , pixels , labels );
+		for( int j=0 ; j<h ; j++ )
+		{
+#pragma omp parallel sections
+			{
+#pragma omp section
+				{
+					dStream.prefetch();
+				}
+#pragma omp section
+				{
+					maxDepth = FEMTreeInitializer< Dim , Real >::template Initialize< (Degree&1)==0 , Point< Real , Colors > >( tree.spaceRoot() , dStream , derivatives , tree.nodeAllocator , tree.initializer() );
+				}
+			}
+			dStream.advance();
+		}
+		delete pixels;
+		delete labels;
+		{
+			std::vector< typename FEMTree< Dim , Real >::FEMTreeNode* > nodes;
+			nodes.reserve( derivatives[0].size() + derivatives[1].size() );
+			for( int i=0 ; i<derivatives[0].size() ; i++ ) nodes.push_back( derivatives[0][i].node );
+			for( int i=0 ; i<derivatives[1].size() ; i++ ) nodes.push_back( derivatives[1][i].node );
+			tree.template thicken< 1 , 0 >( &nodes[0] , (int)nodes.size() );
+		}
+		tree.template finalizeForMultigrid< Degree >( FullDepth.value , []( const RegularTreeNode< Dim , FEMTreeNodeData >* ){ return true; } );
+		if( Verbose.set )
+		{
+			printf( "Valid FEM Nodes / Edges: %d %d\n" , (int)tree.validFEMNodes( IsotropicUIntPack< Dim , FEMSig >() ) , (int)( derivatives[0].size() + derivatives[1].size() ) );
+			printf( "Set tree [%d]: " , maxDepth ) , p.print( true );
+		}
+	}
+
+	{
+		Profiler p;
+		constraints = tree.template initDenseNodeData< Point< Real , Colors > >( IsotropicUIntPack< Dim , FEMSig >() );
+		static const unsigned int DFEMSig = FEMSignature< FEMSig >::DSignature();
+		// Generate the partial-x constraints
+		{
+			typedef UIntPack< DFEMSig , FEMSig > CSignature;
+			typedef IsotropicUIntPack< 2 , 0 > CDerivative;
+			typedef UIntPack< FEMSig , FEMSig > FEMSignature;
+			typedef UIntPack< 1 , 0 > FEMDerivative;
+			SparseNodeData< Point< Real , Colors > , CSignature > partialX;
+			for( int i=0 ; i<derivatives[0].size() ; i++ ) partialX[ derivatives[0][i].node ] = -derivatives[0][i].data * (1<<maxDepth);
+
+			unsigned int derivatives1[] = { 1 , 0 } , derivatives2[] = { 0 , 0 };
+			typename FEMIntegrator::template Constraint< FEMSignature , FEMDerivative , CSignature , CDerivative , 1 > F;
+			F.weights[0][ TensorDerivatives< FEMDerivative >::Index( derivatives1 ) ][ TensorDerivatives< CDerivative >::Index( derivatives2 ) ] = 1;
+			tree.addFEMConstraints( F , partialX , constraints , maxDepth );
+		}
+		// Generate the partial-y constraints
+		{
+			typedef UIntPack< FEMSig , DFEMSig > CSignature;
+			typedef IsotropicUIntPack< 2 , 0 > CDerivative;
+			typedef UIntPack< FEMSig , FEMSig > FEMSignature;
+			typedef UIntPack< 0 , 1 > FEMDerivative;
+			SparseNodeData< Point< Real , Colors > , CSignature > partialY;
+			for( int i=0 ; i<derivatives[1].size() ; i++ ) partialY[ derivatives[1][i].node ] = -derivatives[1][i].data * (1<<maxDepth);
+
+			unsigned int derivatives1[] = { 0 , 1 } , derivatives2[] = { 0 , 0 };
+			typename FEMIntegrator::template Constraint< FEMSignature , FEMDerivative , CSignature , CDerivative , 1 > F;
+			F.weights[0][ TensorDerivatives< FEMDerivative >::Index( derivatives1 ) ][ TensorDerivatives< CDerivative >::Index( derivatives2 ) ] = 1;
+			tree.addFEMConstraints( F , partialY , constraints , maxDepth );
+		}
+		if( Verbose.set ) printf( "Set constraints: " ) , p.print( true );
+	}
+	// Solve the system
+	{
+		Profiler p;
+		double t = Time();
+		solution = tree.template initDenseNodeData< Point< Real , Colors > >( IsotropicUIntPack< Dim , FEMSig >() );
+		typename FEMTree< Dim , Real >::SolverInfo sInfo;
+		sInfo.cgDepth = 0 , sInfo.cascadic = false , sInfo.vCycles = 1 , sInfo.cgAccuracy = 0 , sInfo.verbose = Verbose.set , sInfo.showResidual = ShowResidual.set , sInfo.showGlobalResidual = false , sInfo.sliceBlockSize = ROW_BLOCK_SIZE;
+		sInfo.baseDepth = BaseDepth.value , sInfo.baseVCycles = BaseVCycles.value;
+		sInfo.iters = GSIterations.value;
+
+		sInfo.useSupportWeights = true;
+		sInfo.sorRestrictionFunction = [&] ( Real w , Real ){ return (Real)( WeightScale.value * pow( w , WeightExponent.value ) ); };
+		sInfo.wCycle = false;
+		typename FEMIntegrator::template System< IsotropicUIntPack< Dim , FEMSig > , IsotropicUIntPack< Dim , 1 > > F( { 0. , 1. } );
+		DenseNodeData< Point< Real , Colors > , IsotropicUIntPack< Dim , FEMSig > > _constraints = tree.template initDenseNodeData< Point< Real , Colors > >( IsotropicUIntPack< Dim , FEMSig >() );
+		tree.solveSystem( IsotropicUIntPack< Dim , FEMSig >() , F , constraints , solution , Point< Real , Colors >::Dot , maxDepth , sInfo );
+		if( Verbose.set ) printf( "Solved system: " ) , p.print( true );
+	}
+
+	Point< Real , Colors > average;
+	{
+		Profiler p;
+		Real begin[] = { 0 , 0 } , end[] = { (Real)w/(1<<maxDepth) , (Real)h/(1<<maxDepth) };
+		average = tree.average( solution , begin , end );
+		if( Verbose.set ) printf( "Got average: " ) , p.print( true );
+	}
+	// Stitch the image
+	if( Out.set )
+	{
+		Profiler p;
+		int begin[2] , end[2];
+		ImageReader* in = ImageReader::Get( In.values[0] );
+		ImageWriter* out = ImageWriter::Get( Out.value , w , h , 3 );
+
+		RGBPixel *inRows[2] , *outRows[2];
+		unsigned char* inRow = NULL;
+		for( int i=0 ; i<2 ; i++ ) inRows[i] = new RGBPixel[w*ROW_BLOCK_SIZE] , outRows[i] = new RGBPixel[w*ROW_BLOCK_SIZE];
+		if( in->channels()==1 ) inRow = new unsigned char[w];
+
+		auto FetchInput = [&]( unsigned int block )
+		{
+			int rStart = block*ROW_BLOCK_SIZE;
+			int rEnd = rStart + ROW_BLOCK_SIZE < h ? rStart + ROW_BLOCK_SIZE : h;
+			for( int r=rStart , rr=0 ; r<rEnd ; r++ , rr++ )
+			{
+				if( in->channels()==3 ) in->nextRow( (unsigned char*)( inRows[block&1] + rr*w ) );
+				else
+				{
+					in->nextRow( inRow );
+					RGBPixel *_inRow = inRows[block&1] + rr*w;
+#pragma omp parallel for
+					for( int i=0 ; i<w ; i++ ) _inRow[i][0] = _inRow[i][1] = _inRow[i][2] = inRow[i];
+				}
+			}
+		};
+		auto SetOutput = [&]( unsigned int block )
+		{
+			int rStart = block*ROW_BLOCK_SIZE;
+			int rEnd = rStart + ROW_BLOCK_SIZE < h ? rStart + ROW_BLOCK_SIZE : h;
+			out->nextRows( (unsigned char*)outRows[block&1] , rEnd-rStart );
+		};
+		int blockNum = ( h + ROW_BLOCK_SIZE - 1 ) / ROW_BLOCK_SIZE;
+
+		// Prefetch the first block
+		FetchInput( 0 );
+		omp_set_nested( true );
+		for( int rStart=0 , block=0 ; rStart<h ; rStart+=ROW_BLOCK_SIZE , block++ )
+		{
+#pragma omp parallel sections
+			{
+#pragma omp section
+				{
+					double t = Time();
+					if( block<blockNum ) FetchInput( block+1 );
+				}
+#pragma omp section
+				{
+					double t = Time();
+					if( block>0 ) SetOutput( block-1 );
+				}
+#pragma omp section
+				{
+					double t = Time();
+					RGBPixel *_inRows = inRows[block&1] , *_outRows = outRows[block&1];
+					int rEnd = rStart + ROW_BLOCK_SIZE < h ? rStart + ROW_BLOCK_SIZE : h;
+
+					// Expand the next block of values
+					begin[0] = 0 , begin[1] = rStart , end[0] = w , end[1] = rEnd;
+					Pointer( Point< Real , Colors > ) outBlock = tree.template regularGridUpSample< true >( solution , begin , end );
+					int size = (rEnd-rStart)*w;
+#pragma omp parallel for
+					for( int ii=0 ; ii<size ; ii++ )
+					{
+						Point< Real , Colors > c = Point< Real , Colors >( _inRows[ii][0] , _inRows[ii][1] , _inRows[ii][2] ) / 255;
+						c += outBlock[ii] - average;
+						_outRows[ii] = RGBPixel( c[0] , c[1] , c[2] );
+					}
+					DeletePointer( outBlock );
+				}
+			}
+		}
+		// Write out the last block
+		SetOutput( blockNum-1 );
+		if( Verbose.set ) printf( "Wrote output: " ) , p.print( true );
+		delete[] inRows[0];
+		delete[] outRows[0];
+		delete[] inRows[1];
+		delete[] outRows[1];
+		if( inRow ) delete[] inRow;
+		delete in;
+		delete out;
+	}
+}
+
+#ifdef FAST_COMPILE
+#else // !FAST_COMPILE
+template< typename Real >
+void _Execute( void )
+{
+	switch( Degree.value )
+	{
+	case 1: _Execute< Real , 1 >() ; break;
+	case 2: _Execute< Real , 2 >() ; break;
+//	case 3: _Execute< Real , 3 >() ; break;
+//	case 4: _Execute< Real , 4 >() ; break;
+	default: ERROR_OUT( "Only B-Splines of degree 1 - 2 are supported" );
+	}
+}
+#endif // FAST_COMPILE
+
+int main( int argc , char* argv[] )
+{
+	Timer timer;
+	cmdLineParse( argc-1 , &argv[1] , params );
+	if( MaxMemoryGB.value>0 ) SetPeakMemoryMB( MaxMemoryGB.value<<10 );
+	omp_set_num_threads( Threads.value > 1 ? Threads.value : 1 );
+	if( Verbose.set )
+	{
+		printf( "*********************************************\n" );
+		printf( "*********************************************\n" );
+		printf( "** Running Image Stitching (Version %s) **\n" , VERSION );
+		printf( "*********************************************\n" );
+		printf( "*********************************************\n" );
+	}
+
+	if( !In.set )
+	{
+		ShowUsage( argv[0] );
+		return EXIT_FAILURE;
+	}
+	if( BaseDepth.value>FullDepth.value )
+	{
+		if( BaseDepth.set ) WARN( "Base depth must be smaller than full depth: %d <= %d" , BaseDepth.value , FullDepth.value );
+		BaseDepth.value = FullDepth.value;
+	}
+
+#ifdef USE_DOUBLE
+	typedef double Real;
+#else // !USE_DOUBLE
+	typedef float  Real;
+#endif // USE_DOUBLE
+
+#ifdef FAST_COMPILE
+	static const int Degree = DEFAULT_FEM_DEGREE;
+	WARN( "Compiled for degree-%d, %s-precision _only_" , Degree , sizeof(DefaultFloatType)==4 ? "single" : "double" );
+	_Execute< Real , Degree >();
+#else // !FAST_COMPILE
+	_Execute< Real >();
+#endif // FAST_COMPILE
+
+	if( Performance.set )
+	{
+		printf( "Time (Wall/CPU): %.2f / %.2f\n" , timer.wallTime() , timer.cpuTime() );
+		printf( "Peak Memory (MB): %d\n" , MemoryInfo::PeakMemoryUsageMB() );
+	}
+	return EXIT_SUCCESS;
+}
diff --git a/Src/JPEG.h b/Src/JPEG.h
new file mode 100644
index 0000000..ed98fbe
--- /dev/null
+++ b/Src/JPEG.h
@@ -0,0 +1,52 @@
+#ifndef JPEG_INCLUDED
+#define JPEG_INCLUDED
+#include "Image.h"
+
+#include <setjmp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include "JPEG/jpeglib.h"
+#include "JPEG/jerror.h"
+#include "JPEG/jmorecfg.h"
+#else // !_WIN32
+#include <jpeglib.h>
+#include <jerror.h>
+#include <jmorecfg.h>
+#endif // _WIN32
+
+struct my_error_mgr
+{
+	struct jpeg_error_mgr pub;    // "public" fields
+	jmp_buf setjmp_buffer;        // for return to caller
+};
+typedef struct my_error_mgr * my_error_ptr;
+
+struct JPEGReader : public ImageReader
+{
+	JPEGReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+	~JPEGReader( void );
+	unsigned int nextRow( unsigned char* row );
+	static bool GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+protected:
+	FILE* _fp;
+	struct jpeg_decompress_struct _cInfo;
+	struct my_error_mgr _jErr;
+	unsigned int _currentRow;
+};
+
+struct JPEGWriter : public ImageWriter
+{
+	JPEGWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , unsigned int quality=100 );
+	~JPEGWriter( void );
+	unsigned int nextRow( const unsigned char* row );
+	unsigned int nextRows( const unsigned char* rows , unsigned int rowNum );
+protected:
+	FILE* _fp;
+	struct jpeg_compress_struct _cInfo;
+	struct my_error_mgr _jErr;
+	unsigned int _currentRow;
+};
+
+#include "JPEG.inl"
+#endif //JPEG_INCLUDED
diff --git a/Src/JPEG.inl b/Src/JPEG.inl
new file mode 100644
index 0000000..e98b643
--- /dev/null
+++ b/Src/JPEG.inl
@@ -0,0 +1,130 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+
+inline METHODDEF( void )
+my_error_exit (j_common_ptr cinfo)
+{
+	// cinfo->err really points to a my_error_mgr struct, so coerce pointer
+	my_error_ptr myerr = (my_error_ptr) cinfo->err;
+
+	// Always display the message.
+	// We could postpone this until after returning, if we chose.
+	(*cinfo->err->output_message) (cinfo);
+
+	// Return control to the setjmp point
+	longjmp(myerr->setjmp_buffer, 1);
+}
+
+inline bool JPEGReader::GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	FILE* fp = fopen( fileName , "rb" );
+	if( !fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+
+	struct jpeg_decompress_struct cInfo;
+	struct my_error_mgr jErr;
+
+	cInfo.err = jpeg_std_error( &jErr.pub );
+	jErr.pub.error_exit = my_error_exit;
+	if( setjmp( jErr.setjmp_buffer ) )
+	{
+		jpeg_destroy_decompress( &cInfo );
+		ERROR_OUT( "JPEG error occured" );
+	}
+
+	jpeg_create_decompress( &cInfo );
+	jpeg_stdio_src( &cInfo , fp );
+
+	(void) jpeg_read_header( &cInfo , TRUE );
+
+	channels = cInfo.num_components;
+	width  = cInfo.image_width;
+	height = cInfo.image_height;
+	jpeg_destroy_decompress( &cInfo );
+
+	fclose( fp );
+	return true;
+}
+
+inline JPEGReader::JPEGReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	_currentRow = 0;
+	_fp = fopen( fileName , "rb" );
+	if( !_fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+
+	_cInfo.err = jpeg_std_error( &_jErr.pub );
+	_jErr.pub.error_exit = my_error_exit;
+	if( setjmp( _jErr.setjmp_buffer ) )
+	{
+		jpeg_destroy_decompress( &_cInfo );
+		ERROR_OUT( "JPEG error occured" );
+	}
+
+	jpeg_create_decompress( &_cInfo );
+	jpeg_stdio_src( &_cInfo , _fp );
+
+	(void) jpeg_read_header( &_cInfo , TRUE );
+	(void) jpeg_start_decompress( &_cInfo );
+
+	channels = _cInfo.output_components;
+	width  = _cInfo.output_width;
+	height = _cInfo.output_height;
+}
+inline JPEGReader::~JPEGReader( void )
+{
+	(void) jpeg_finish_decompress( &_cInfo );
+	jpeg_destroy_decompress( &_cInfo );
+	fclose( _fp );
+}
+inline unsigned int JPEGReader::nextRow( unsigned char* row )
+{
+	JSAMPROW row_pointers[1];
+	row_pointers[0] = row;
+	jpeg_read_scanlines( &_cInfo , row_pointers, 1 );
+	return _currentRow++;
+}
+
+inline JPEGWriter::JPEGWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , unsigned int quality )
+{
+	_currentRow = 0;
+	_fp = fopen( fileName , "wb" );
+	if( !_fp ) ERROR_OUT( "Failed to open: %s" , fileName );
+
+	_cInfo.err = jpeg_std_error( &_jErr.pub );
+	jpeg_create_compress( &_cInfo );
+
+	jpeg_stdio_dest( &_cInfo , _fp );
+
+	_cInfo.image_width = width;
+	_cInfo.image_height = height;
+	_cInfo.input_components = channels;
+	_cInfo.in_color_space = JCS_RGB;		/* colorspace of input image */
+
+	jpeg_set_defaults( &_cInfo );
+	jpeg_set_quality( &_cInfo , quality , TRUE );
+
+	jpeg_start_compress( &_cInfo , TRUE );
+}
+inline JPEGWriter::~JPEGWriter( void )
+{
+	jpeg_finish_compress( &_cInfo );
+	jpeg_destroy_compress( &_cInfo );
+	fclose( _fp );
+}
+inline unsigned int JPEGWriter::nextRow( const unsigned char* row )
+{
+	JSAMPROW row_pointer[1];
+	row_pointer[0] = ( unsigned char* )row;
+	(void) jpeg_write_scanlines( &_cInfo , row_pointer , 1 );
+	return _currentRow++;
+}
+
+inline unsigned int JPEGWriter::nextRows( const unsigned char* rows , unsigned int rowNum )
+{
+	JSAMPROW* row_pointers = new JSAMPROW[ rowNum ];
+	for( unsigned int r=0 ; r<rowNum ; r++ ) row_pointers[r] = (unsigned char*)( rows + r * 3 * sizeof( unsigned char ) * _cInfo.image_width );
+	(void) jpeg_write_scanlines( &_cInfo , row_pointers , rowNum );
+	delete[] row_pointers;
+	return _currentRow += rowNum;
+}
+
diff --git a/Src/LinearSolvers.h b/Src/LinearSolvers.h
new file mode 100644
index 0000000..a68c94c
--- /dev/null
+++ b/Src/LinearSolvers.h
@@ -0,0 +1,570 @@
+#ifndef LINEAR_SOLVERS_INCLUDE
+#define LINEAR_SOLVERS_INCLUDE
+
+#ifdef USE_CHOLMOD
+#include <Cholmod/cholmod.h>
+#if defined( WIN32 ) || defined( _WIN64 )
+#pragma message( "[WARNING] Need to explicitly exclude VCOMP.lib" )
+#pragma comment( lib , "CHOLMOD_FULL.lib" )
+#endif // WIN32 || _WIN64
+#ifdef DLONG
+typedef long long SOLVER_LONG;
+#define CHOLMOD( name ) cholmod_l_ ## name
+#else // !DLONG
+typedef       int SOLVER_LONG;
+#define CHOLMOD( name ) cholmod_ ## name
+#endif // DLONG
+#elif defined(EIGEN_USE_MKL_ALL)
+#pragma comment( lib , "mkl_core.lib" )
+#pragma comment( lib , "mkl_intel_lp64.lib" )
+#pragma comment( lib , "mkl_intel_thread.lib" )
+#pragma comment( lib , "mkl_blas95_lp64.lib" )
+#pragma comment( lib , "libiomp5md.lib" )
+#endif // USE_CHOLMOD
+
+#include "SparseMatrixInterface.h"
+
+inline double                        SquareNorm( const double* values , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += values[i] * values[i] ; return norm2; }
+inline double                        SquareNorm( const  float* values , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += values[i] * values[i] ; return norm2; }
+template< class Type > inline double SquareNorm( const   Type* values , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += values[dim].squareNorm()  ; return norm2 ; }
+
+inline double                        SquareDifference( const double* values1 , const double* values2 , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += ( values1[i] - values2[i] ) * ( values1[i] - values2[i] ) ; return norm2; }
+inline double                        SquareDifference( const  float* values1 , const  float* values2 , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += ( values1[i] - values2[i] ) * ( values1[i] - values2[i] ) ; return norm2; }
+template< class Type > inline double SquareDifference( const   Type* values1 , const   Type* values2 , int dim ){ double norm2 = 0 ; for( int i=0 ; i<dim ; i++ ) norm2 += ( values1[dim] - values2[dim] ).squareNorm()  ; return norm2 ; }
+
+
+// This is the conjugate gradients solver.
+// The assumption is that the class SPDOperator defines a method operator()( const Real* , Real* ) which corresponds to applying a symmetric positive-definite operator.
+template< class Real >
+struct CGScratch
+{
+	Real *r , *d , *q;
+	CGScratch( void ) : r(NULL) , d(NULL) , q(NULL) , _dim(0){ ; }
+	CGScratch( int dim ) : r(NULL) , d(NULL) , q(NULL){ resize(dim); }
+	~CGScratch( void ){ resize(0); }
+	void resize( int dim )
+	{
+		if( dim!=_dim )
+		{
+			if( r ) delete[] r ; r = NULL;
+			if( d ) delete[] d ; d = NULL;
+			if( q ) delete[] q ; q = NULL;
+			if( dim ) r = new Real[dim] , d = new Real[dim] , q = new Real[dim];
+			_dim = dim;
+		}
+	}
+protected:
+	int _dim;
+};
+template< class Real >
+struct PreconditionedCGScratch : public CGScratch< Real >
+{
+	Real *s;
+	PreconditionedCGScratch( void ) : CGScratch< Real >() , s(NULL){ ; }
+	PreconditionedCGScratch( int dim ) : CGScratch< Real >() { resize(dim); }
+	~PreconditionedCGScratch( void ){ resize(0); }
+	void resize( int dim )
+	{
+		if( dim!=CGScratch< Real >::_dim )
+		{
+			if( s ) delete[] s; s = NULL;
+			if( dim ) s = new Real[dim];
+		}
+		CGScratch< Real >::resize( dim );
+	}
+};
+template< class Real >
+struct DiagonalPreconditioner
+{
+	Real* iDiagonal;
+	DiagonalPreconditioner( void ) : iDiagonal(NULL) , _dim(0){ ; }
+	~DiagonalPreconditioner( void ){ if( iDiagonal ) delete[] iDiagonal ; iDiagonal = NULL; }
+	template< class MatrixRowIterator >
+	void set( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+	{
+		if( _dim!=M.rows() )
+		{
+			_dim = (int)M.rows();
+			if( iDiagonal ) delete[] iDiagonal , iDiagonal = NULL;
+			if( _dim>0 ) iDiagonal = new Real[_dim];
+		}
+		memset( iDiagonal , 0 , sizeof(Real)*_dim );
+#pragma omp parallel for
+		for( int i=0 ; i<M.rows() ; i++ )
+		{
+			for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) if( iter->N==i ) iDiagonal[i] += iter->Value;
+			iDiagonal[i] = (Real)1./iDiagonal[i];
+		}
+	}
+	void operator()( const Real* in , Real* out ) const
+	{
+#pragma omp parallel for
+		for( int i=0 ; i<_dim ; i++ ) out[i] = in[i] * iDiagonal[i];
+	}
+protected:
+	int _dim;
+};
+
+template< class Real , class SPDOperator >
+int SolveCG( SPDOperator& L , int iters , int dim , const Real* b , Real* x , CGScratch< Real >* scratch=NULL , double eps=1e-8 , int threads=1 , bool verbose=false )
+{
+	eps *= eps;
+	Real *r , *d , *q;
+	if( scratch ) r = scratch->r    , d = scratch->d    , q = scratch->q;
+	else          r = new Real[dim] , d = new Real[dim] , q = new Real[dim];
+	memset( r , 0 , sizeof(Real)*dim ) , memset( d , 0 , sizeof(Real)*dim ) , memset( q , 0 , sizeof(Real)*dim );
+	double delta_new = 0 , delta_0;
+
+	L( x , r );
+#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
+	for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = b[i] - r[i] , delta_new += r[i] * r[i];
+
+	delta_0 = delta_new;
+	if( delta_new<eps )
+	{
+		if( !scratch ) delete[] r , delete[] d , delete[] q;
+		return 0;
+	}
+
+	int ii;
+	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
+	{
+		L( d , q );
+        double dDotQ = 0;
+#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
+		for( int i=0 ; i<dim ; i++ ) dDotQ += d[i] * q[i];
+		Real alpha = Real( delta_new / dDotQ );
+
+		double delta_old = delta_new;
+		delta_new = 0;
+
+		const int RESET_COUNT = 50;
+		if( (ii%RESET_COUNT)==(RESET_COUNT-1) )
+		{
+#pragma omp parallel for num_threads( threads )
+			for( int i=0 ; i<dim ; i++ ) x[i] += d[i] * alpha;
+			L( x , r );
+#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i] , delta_new += r[i] * r[i];
+		}
+		else
+#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] -= q[i] * alpha , delta_new += r[i] * r[i] , x[i] += d[i] * alpha;
+
+		Real beta = Real( delta_new / delta_old );
+#pragma omp parallel for num_threads( threads )
+		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + d[i] * beta;
+	}
+	if( verbose )
+	{
+		L( x , r );
+#pragma omp parallel for num_threads( threads )
+		for( int i=0 ; i<dim ; i++ ) r[i] -= b[i];
+		printf( "CG: %d %g -> %g\n" , ii , SquareNorm( b , dim ) , SquareNorm( r , dim ) );
+	}
+	if( !scratch ) delete[] r , delete[] d , delete[] q;
+	return ii;
+}
+template< class Real , class SPDOperator , class SPDPreconditioner >
+int SolvePreconditionedCG( SPDOperator& L , SPDPreconditioner& Pinverse , int iters , int dim , const Real* b , Real* x , PreconditionedCGScratch< Real >* scratch=NULL , double eps=1e-8 , int threads=1 , bool verbose=false )
+{
+	eps *= eps;
+	Real *r , *d , *q , *s;
+	if( scratch ) r = scratch->r    , d = scratch->d    , q = scratch->q , s = scratch->s;
+	else          r = new Real[dim] , d = new Real[dim] , q = new Real[dim] , s = new Real[dim];
+	memset( r , 0 , sizeof(Real)*dim ) , memset( d , 0 , sizeof(Real)*dim ) , memset( q , 0 , sizeof(Real)*dim ) , memset( s , 0 , sizeof(Real)*dim );
+	double delta_new = 0 , delta_0;
+
+	L( x , r );
+#pragma omp parallel for num_threads( threads )
+	for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i];
+	Pinverse( r , d );
+#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
+	for( int i=0 ; i<dim ; i++ ) delta_new += r[i] * d[i];
+
+	delta_0 = delta_new;
+	if( delta_new<eps )
+	{
+		if( !scratch ) delete[] r , delete[] d , delete[] q;
+		return 0;
+	}
+	int ii;
+	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
+	{
+		L( d , q );
+        double dDotQ = 0;
+#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
+		for( int i=0 ; i<dim ; i++ ) dDotQ += d[i] * q[i];
+		Real alpha = Real( delta_new / dDotQ );
+
+		const int RESET_COUNT = 50;
+#pragma omp parallel for num_threads( threads )
+		for( int i=0 ; i<dim ; i++ ) x[i] += d[i] * alpha;
+		if( (ii%RESET_COUNT)==(RESET_COUNT-1) )
+		{
+			L( x , r );
+#pragma omp parallel for num_threads( threads )
+			for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i];
+		}
+		else
+#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] -= q[i] * alpha;
+		Pinverse( r , s );
+
+		double delta_old = delta_new;
+		delta_new = 0;
+#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
+		for( int i=0 ; i<dim ; i++ ) delta_new += r[i] * s[i];
+
+		Real beta = Real( delta_new / delta_old );
+#pragma omp parallel for num_threads( threads )
+		for( int i=0 ; i<dim ; i++ ) d[i] = s[i] + d[i] * beta;
+	}
+	if( verbose )
+	{
+		L( x , r );
+#pragma omp parallel for num_threads( threads )
+		for( int i=0 ; i<dim ; i++ ) r[i] -= b[i];
+		printf( "PCCG: %d %g -> %g\n" , ii , SquareNorm( b , dim ) , SquareNorm( r , dim ) );
+	}
+	if( !scratch ) delete[] r , delete[] d , delete[] q , delete[] s;
+	return ii;
+}
+
+
+#ifdef USE_EIGEN
+#define STORE_EIGEN_MATRIX
+#ifdef EIGEN_USE_MKL_ALL
+#include <Eigen/PardisoSupport>
+#else // !EIGEN_USE_MKL_ALL
+#include <Eigen/Sparse>
+#endif // EIGEN_USE_MKL_ALL
+
+template< class Real , class MatrixRowIterator >
+struct EigenSolver
+{
+	virtual void update( const SparseMatrixInterface< Real , MatrixRowIterator >& M ) = 0;
+	virtual void solve( ConstPointer( Real ) b , Pointer( Real ) x ) = 0;
+	virtual size_t dimension( void ) const = 0;
+};
+
+template< class Real , class MatrixRowIterator >
+class EigenSolverCholeskyLLt : public EigenSolver< Real , MatrixRowIterator >
+{
+#ifdef EIGEN_USE_MKL_ALL
+	typedef Eigen::PardisoLLT< Eigen::SparseMatrix< double > > Eigen_Solver;
+	typedef Eigen::VectorXd                                    Eigen_Vector;
+#else // !EIGEN_USE_MKL_ALL
+	typedef Eigen::SimplicialLLT< Eigen::SparseMatrix< double > > Eigen_Solver;
+	typedef Eigen::VectorXd                                       Eigen_Vector;
+#endif // EIGEN_USE_MKL_ALL
+	Eigen_Solver _solver;
+	Eigen_Vector _eigenB;
+#ifdef STORE_EIGEN_MATRIX
+	Eigen::SparseMatrix< double > _eigenM;
+#endif // STORE_EIGEN_MATRIX
+public:
+	EigenSolverCholeskyLLt( const SparseMatrixInterface< Real , MatrixRowIterator >& M , bool analyzeOnly=false )
+	{
+#ifdef STORE_EIGEN_MATRIX
+		_eigenM.resize( int( M.rows() ) , int( M.rows() ) );
+#else // !STORE_EIGEN_MATRIX
+		Eigen::SparseMatrix< double > eigenM( int( M.rows() ) , int( M.rows() ) );
+#endif // STORE_EIGEN_MATRIX
+		std::vector< Eigen::Triplet< double > > triplets;
+		triplets.reserve( M.entries() );
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) triplets.push_back( Eigen::Triplet< double >( i , iter->N , iter->Value ) );
+#ifdef STORE_EIGEN_MATRIX
+		_eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.analyzePattern( _eigenM );
+#else // !STORE_EIGEN_MATRIX
+		eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.analyzePattern( eigenM );
+#endif // STORE_EIGEN_MATRIX
+		if( !analyzeOnly )
+		{
+#ifdef STORE_EIGEN_MATRIX
+			_solver.factorize( _eigenM );
+#else // !STORE_EIGEN_MATRIX
+			_solver.factorize( eigenM );
+#endif // STORE_EIGEN_MATRIX
+			if( _solver.info()!=Eigen::Success ) fprintf( stderr , "[ERROR] EigenSolverCholeskyLLt::EigenSolverCholeskyLLt Failed to factorize matrix\n" ) , exit(0);
+		}
+		_eigenB.resize( M.rows() );
+	}
+	void update( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+	{
+#ifdef STORE_EIGEN_MATRIX
+#pragma omp parallel for
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) _eigenM.coeffRef( i , iter->N ) = iter->Value;
+		_solver.factorize( _eigenM );
+#else // !STORE_EIGEN_MATRIX
+		Eigen::SparseMatrix< double > eigenM( int( M.rows() ) , int( M.rows() ) );
+		std::vector< Eigen::Triplet< double > > triplets;
+		triplets.reserve( M.entries() );
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) triplets.push_back( Eigen::Triplet< double >( i , iter->N , iter->Value ) );
+		eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.factorize( eigenM );
+#endif // STORE_EIGEN_MATRIX
+		switch( _solver.info() )
+		{
+		case Eigen::Success: break;
+		case Eigen::NumericalIssue: fprintf( stderr , "[ERROR] EigenSolverCholeskyLLt::update Failed to factorize matrix (numerical issue)\n" ) , exit(0);
+		case Eigen::NoConvergence:  fprintf( stderr , "[ERROR] EigenSolverCholeskyLLt::update Failed to factorize matrix (no convergence)\n" ) , exit(0);
+		case Eigen::InvalidInput:   fprintf( stderr , "[ERROR] EigenSolverCholeskyLLt::update Failed to factorize matrix (invalid input)\n" ) , exit(0);
+		default: fprintf( stderr , "[ERROR] EigenSolverCholeskyLLt::update Failed to factorize matrix\n" ) , exit(0);
+		}
+	}
+	void solve( ConstPointer( Real ) b , Pointer( Real ) x )
+	{
+#pragma omp parallel for
+		for( int i=0 ; i<_eigenB.size() ; i++ ) _eigenB[i] = b[i];
+		Eigen_Vector eigenX = _solver.solve( _eigenB );
+#pragma omp parallel for
+		for( int i=0 ; i<eigenX.size() ; i++ ) x[i] = (Real)eigenX[i];
+	}
+	size_t dimension( void ) const { return _eigenB.size(); }
+	static void Solve( const SparseMatrixInterface< Real , MatrixRowIterator >& M , ConstPointer( Real ) b , Pointer( Real ) x ){ EigenSolverCholeskyLLt solver( M ) ; solver.solve( b , x ); }
+};
+template< class Real , class MatrixRowIterator >
+class EigenSolverCholeskyLDLt : public EigenSolver< Real , MatrixRowIterator >
+{
+#ifdef EIGEN_USE_MKL_ALL
+	typedef Eigen::PardisoLDLT< Eigen::SparseMatrix< double > > Eigen_Solver;
+	typedef Eigen::VectorXd                                     Eigen_Vector;
+#else // !EIGEN_USE_MKL_ALL
+	typedef Eigen::SimplicialLDLT< Eigen::SparseMatrix< double > > Eigen_Solver;
+	typedef Eigen::VectorXd                                        Eigen_Vector;
+#endif // EIGEN_USE_MKL_ALL
+	Eigen_Solver _solver;
+	Eigen_Vector _eigenB;
+public:
+	EigenSolverCholeskyLDLt( const SparseMatrixInterface< Real , MatrixRowIterator >& M , bool analyzeOnly=false )
+	{
+		Eigen::SparseMatrix< double > eigenM( int( M.rows() ) , int( M.rows() ) );
+		std::vector< Eigen::Triplet<double> > triplets;
+		triplets.reserve( M.entries() );
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) triplets.push_back( Eigen::Triplet< double >( i , iter->N , iter->Value ) );
+		eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.analyzePattern( eigenM );
+		if( !analyzeOnly )
+		{
+			_solver.factorize( eigenM );
+			if( _solver.info()!=Eigen::Success ) fprintf( stderr , "[ERROR] EigenSolverCholeskyLDLt::EigenSolverCholeskyLDLt Failed to factorize matrix\n" ) , exit(0);
+		}
+		_eigenB.resize( M.rows() );
+	}
+	void update( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+	{
+		Eigen::SparseMatrix< double > eigenM( int( M.rows() ) , int( M.rows() ) );
+		std::vector< Eigen::Triplet<double> > triplets;
+		triplets.reserve( M.entries() );
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) triplets.push_back( Eigen::Triplet< double >( i , iter->N , iter->Value ) );
+		eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.factorize( eigenM );
+		if( _solver.info()!=Eigen::Success ) fprintf( stderr , "[ERROR] EigenSolverCholeskyLDLt::update Failed to factorize matrix\n" ) , exit(0);
+	}
+	void solve( ConstPointer( Real ) b , Pointer( Real ) x )
+	{
+#pragma omp parallel for
+		for( int i=0 ; i<_eigenB.size() ; i++ ) _eigenB[i] = b[i];
+		Eigen_Vector eigenX = _solver.solve( _eigenB );
+#pragma omp parallel for
+		for( int i=0 ; i<eigenX.size() ; i++ ) x[i] = (Real)eigenX[i];
+	}
+	size_t dimension( void ) const { return _eigenB.size(); }
+	static void Solve( const SparseMatrixInterface< Real , MatrixRowIterator >& M , ConstPointer( Real ) b , Pointer( Real ) x ){ EigenSolverCholeskyLDLt solver( M ) ; solver.solve( b , x ); }
+};
+template< class Real , class MatrixRowIterator >
+class EigenSolverCG : public EigenSolver< Real , MatrixRowIterator >
+{
+#if 1
+//	Eigen::ConjugateGradient< Eigen::SparseMatrix< double > , Eigen::Lower , Eigen::IncompleteLUT< double > > _solver;
+	Eigen::ConjugateGradient< Eigen::SparseMatrix< double > > _solver;
+#else
+	Eigen::BiCGSTAB< Eigen::SparseMatrix< double > > _solver;
+#endif
+	Eigen::VectorXd _eigenB , _eigenX;
+	Eigen::SparseMatrix< double > _eigenM;
+public:
+	EigenSolverCG( const SparseMatrixInterface< Real , MatrixRowIterator >& M , int iters=20 , double tolerance=0. )
+	{
+		_eigenM.resize( (int)M.rows() , (int)M.rows() );
+		std::vector< Eigen::Triplet< double > > triplets;
+		triplets.reserve( M.entries() );
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) triplets.push_back( Eigen::Triplet< double >( i , iter->N , iter->Value ) );
+		_eigenM.setFromTriplets( triplets.begin() , triplets.end() );
+		_solver.compute( _eigenM );
+		_solver.analyzePattern( _eigenM );
+		if( _solver.info()!=Eigen::Success ) fprintf( stderr , "[ERROR] EigenSolverCG::EigenSolverCG Failed to factorize matrix\n" ) , exit(0);
+		_eigenB.resize( M.rows() ) , _eigenX.resize( M.rows() );
+		_solver.setMaxIterations( iters );
+		_solver.setTolerance( tolerance );
+	}
+	void update( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+	{
+#pragma omp parallel for
+		for( int i=0 ; i<M.rows() ; i++ ) for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) _eigenM.coeffRef( i , iter->N ) = iter->Value;
+		_solver.compute( _eigenM );
+		_solver.analyzePattern( _eigenM );
+		if( _solver.info()!=Eigen::Success ) fprintf( stderr , "[ERROR] EigenSolverCG::update Failed to factorize matrix\n" ) , exit(0);
+	}
+
+	void setIters( int iters ){ _solver.setMaxIterations( iters ); }
+	void solve( ConstPointer( Real ) b , Pointer( Real ) x )
+	{
+#pragma omp parallel for
+		for( int i=0 ; i<_eigenB.size() ; i++ ) _eigenB[i] = b[i] , _eigenX[i] = x[i];
+		_eigenX = _solver.solveWithGuess( _eigenB , _eigenX );
+#pragma omp parallel for
+		for( int i=0 ; i<_eigenX.size() ; i++ ) x[i] = _eigenX[i];
+	}
+	size_t dimension( void ) const { return _eigenB.size(); }
+	static void Solve( const SparseMatrixInterface< Real , MatrixRowIterator >& M , const Real* b , Real* x , int iters ){ EigenSolverCG solver( M , iters ) ; solver.solve( b , x ); }
+};
+
+#endif // USE_EIGEN
+
+#ifdef USE_CHOLMOD
+class CholmodSolver
+{
+	const static bool LOWER_TRIANGULAR = true;
+	int dim;
+	cholmod_factor* cholmod_L;
+	cholmod_dense*  cholmod_b;
+	cholmod_sparse* cholmod_M;
+	std::vector< bool > flaggedValues;
+	template< class Real , class MatrixRowIterator > void _init( const SparseMatrixInterface< Real , MatrixRowIterator >& M );
+public:
+	static cholmod_common cholmod_C;
+	static bool cholmod_C_set;
+
+	template< class Real , class MatrixRowIterator >
+	CholmodSolver( const SparseMatrixInterface< Real , MatrixRowIterator >& M , bool analyzeOnly=false );
+	~CholmodSolver( void );
+
+	template< class Real > void solve( ConstPointer( Real ) b , Pointer( Real ) x );
+	template< class Real , class MatrixRowIterator > bool update( const SparseMatrixInterface< Real , MatrixRowIterator >& M );
+	int nonZeros( void ) const;
+
+};
+bool CholmodSolver::cholmod_C_set = false;
+cholmod_common CholmodSolver::cholmod_C;
+
+template< class Real , class MatrixRowIterator > CholmodSolver::CholmodSolver( const SparseMatrixInterface< Real , MatrixRowIterator >& M , bool analyzeOnly ){ _init( M ) ; if( !analyzeOnly ) update( M ); }
+template< class Real , class MatrixRowIterator >
+void CholmodSolver::_init( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+{
+	{
+		if( !cholmod_C_set ) CHOLMOD(start)( &cholmod_C );
+		cholmod_C_set = true;
+	}
+	dim = (int)M.rows();
+
+	int maxEntries;
+	if( LOWER_TRIANGULAR )
+	{
+		maxEntries = (int)( ( M.entries()-M.rows() ) / 2 + M.rows() );
+		cholmod_M = CHOLMOD(allocate_sparse)( dim , dim , maxEntries , 0 , 1 , -1 , CHOLMOD_REAL , &cholmod_C );
+	}
+	else
+	{
+		maxEntries = (int)M.entries();
+		cholmod_M = CHOLMOD(allocate_sparse)( dim , dim , maxEntries , 0 , 1 ,  0 , CHOLMOD_REAL , &cholmod_C );
+	}
+	cholmod_M->i = malloc( sizeof( SOLVER_LONG ) * maxEntries );
+	cholmod_M->x = malloc( sizeof( double ) * maxEntries );
+
+	SOLVER_LONG *_p = (SOLVER_LONG*)cholmod_M->p;
+	SOLVER_LONG *_i = (SOLVER_LONG*)cholmod_M->i;
+
+	int off = 0;
+	dim = 0;
+
+	for( int i=0 ; i<M.rows() ; i++ )
+	{
+		_p[dim++] = off;
+		for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ ) if( !LOWER_TRIANGULAR || iter->N>=i ) _i[off++] = iter->N;
+	}
+	_p[dim] = off;
+
+	cholmod_L = CHOLMOD(analyze)( cholmod_M , &cholmod_C );
+	cholmod_b = CHOLMOD(allocate_dense)( dim , 1 , dim , cholmod_M->xtype , &cholmod_C );
+}
+template< class Real , class MatrixRowIterator >
+bool CholmodSolver::update( const SparseMatrixInterface< Real , MatrixRowIterator >& M )
+{
+	double *_x = (double*)cholmod_M->x;
+	int off = 0;
+
+	SOLVER_LONG *_p = (SOLVER_LONG*)cholmod_M->p;
+#pragma omp parallel for
+	for( int i=0 ; i<M.rows() ; i++ )
+	{
+		int off = (int)_p[i];
+		for( MatrixRowIterator iter=M.begin(i) ; iter!=M.end(i) ; iter++ )if( !LOWER_TRIANGULAR || iter->N>=i ) _x[off++] = double( iter->Value );
+	}
+
+	cholmod_C.print = 0;
+	CHOLMOD(factorize)( cholmod_M , cholmod_L , &cholmod_C );
+	if( cholmod_C.status==CHOLMOD_NOT_POSDEF )
+	{
+		fprintf( stderr , "[WARNING] CholmodSolver::update: Matrix not positive-definite\n" );
+		return false;
+	}
+	else if( cholmod_C.status==CHOLMOD_OUT_OF_MEMORY )
+	{
+		fprintf( stderr , "[WARNING] CholmodSolver::update: CHOLMOD ran out of memory\n" );
+		return false;
+	}
+	else if( cholmod_C.status!=CHOLMOD_OK )
+	{
+		fprintf( stderr , "[WARNING] CholmodSolver::update: CHOLMOD status not OK: %d\n" , cholmod_C.status );
+		return false;
+	}
+	return true;
+}
+CholmodSolver::~CholmodSolver( void )
+{
+	if( cholmod_L ) CHOLMOD(free_factor)( &cholmod_L , &cholmod_C ) , cholmod_L = NULL;
+	if( cholmod_b ) CHOLMOD(free_dense )( &cholmod_b , &cholmod_C ) , cholmod_b = NULL;
+	if( cholmod_M ) CHOLMOD(free_sparse)( &cholmod_M , &cholmod_C ) , cholmod_M = NULL;
+}
+
+template< class Real >
+void CholmodSolver::solve( ConstPointer( Real ) b , Pointer( Real ) x )
+{
+	double* _b = (double*)cholmod_b->x;
+	for( int i=0 ; i<dim ; i++ ) _b[i] = (double)b[i];
+
+	cholmod_dense* cholmod_x = CHOLMOD(solve)( CHOLMOD_A , cholmod_L , cholmod_b , &cholmod_C );
+	double* _x = (double*)cholmod_x->x;
+	for( int i=0 ; i<dim ; i++ ) x[i] = (Real)_x[i];
+
+	CHOLMOD(free_dense)( &cholmod_x , &cholmod_C );
+}
+int CholmodSolver::nonZeros( void ) const
+{
+	long long nz = 0;
+	if( cholmod_L->xtype != CHOLMOD_PATTERN && !(cholmod_L->is_super ) ) for( int i=0 ; i<cholmod_L->n ; i++ ) nz += ((SOLVER_LONG*)cholmod_L->nz)[i];
+	bool examine_super = false;
+	if( cholmod_L->xtype != CHOLMOD_PATTERN ) examine_super = true ;
+	else                                      examine_super = ( ((int*)cholmod_L->s)[0] != (-1));
+	if( examine_super )
+	{
+		/* check and print each supernode */
+		for (int s = 0 ; s < cholmod_L->nsuper ; s++)
+		{
+			int k1 = ((int*)cholmod_L->super) [s] ;
+			int k2 = ((int*)cholmod_L->super) [s+1] ;
+			int psi = ((int*)cholmod_L->pi)[s] ;
+			int psend = ((int*)cholmod_L->pi)[s+1] ;
+			int nsrow = psend - psi ;
+			int nscol = k2 - k1 ;
+			nz += nscol * nsrow - (nscol*nscol - nscol)/2 ;
+		}
+	}
+	return (int)nz;
+}
+#endif // USE_CHOLMOD
+#endif // LINEAR_SOLVERS_INCLUDE
\ No newline at end of file
diff --git a/Src/MAT.h b/Src/MAT.h
index c090450..019067a 100644
--- a/Src/MAT.h
+++ b/Src/MAT.h
@@ -28,20 +28,35 @@ DAMAGE.
 #ifndef MAT_INCLUDED
 #define MAT_INCLUDED
 #include "Geometry.h"
+#include "Array.h"
 
-template <class Real>
-class MinimalAreaTriangulation
+template< class Real , unsigned int Dim >
+std::vector< TriangleIndex > MinimalAreaTriangulation( ConstPointer( Point< Real , Dim > ) vertices , size_t vCount );
+
+template< class Real , unsigned int Dim >
+class _MinimalAreaTriangulation
 {
-	Real* bestTriangulation;
-	int* midPoint;
-	Real GetArea(const size_t& i,const size_t& j,const std::vector<Point3D<Real> >& vertices);
-	void GetTriangulation(const size_t& i,const size_t& j,const std::vector<Point3D<Real> >& vertices,std::vector<TriangleIndex>& triangles);
-public:
-	MinimalAreaTriangulation(void);
-	~MinimalAreaTriangulation(void);
-	Real GetArea(const std::vector<Point3D<Real> >& vertices);
-	void GetTriangulation(const std::vector<Point3D<Real> >& vertices,std::vector<TriangleIndex>& triangles);
+	Pointer( Real ) _bestTriangulation;
+	Pointer( int ) _midpoint;
+	size_t _vCount;
+	ConstPointer( Point< Real , Dim > ) _vertices;
+
+	void _set( void );
+	Real _subPolygonArea( size_t i , size_t j );
+	void _addTriangles( size_t i , size_t j , std::vector< TriangleIndex >& triangles ) const;
+	size_t _subPolygonIndex( size_t i , size_t j ) const;
+
+	_MinimalAreaTriangulation( ConstPointer( Point< Real , Dim > ) vertices , size_t vCount );
+	~_MinimalAreaTriangulation( void );
+	std::vector< TriangleIndex > getTriangulation( void );
+	friend std::vector< TriangleIndex > MinimalAreaTriangulation< Real , Dim >( ConstPointer( Point< Real , Dim > ) vertices , size_t vCount );
 };
+template< class Real , unsigned int Dim >
+std::vector< TriangleIndex > MinimalAreaTriangulation( ConstPointer( Point< Real , Dim > ) vertices , size_t vCount )
+{
+	_MinimalAreaTriangulation< Real , Dim > MAT( vertices , vCount );
+	return MAT.getTriangulation();
+}
 
 #include "MAT.inl"
 
diff --git a/Src/MAT.inl b/Src/MAT.inl
index 5106659..0c0e337 100644
--- a/Src/MAT.inl
+++ b/Src/MAT.inl
@@ -25,42 +25,39 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 DAMAGE.
 */
+
 //////////////////////////////
 // MinimalAreaTriangulation //
 //////////////////////////////
-template <class Real>
-MinimalAreaTriangulation<Real>::MinimalAreaTriangulation(void)
+template< class Real , unsigned int Dim >
+_MinimalAreaTriangulation< Real , Dim >::_MinimalAreaTriangulation( ConstPointer( Point< Real , Dim > ) vertices , size_t vCount ) : _vertices( vertices ) , _vCount( vCount )
 {
-	bestTriangulation=NULL;
-	midPoint=NULL;
+	_bestTriangulation = NullPointer( Real );
+	_midpoint = NullPointer( int );
 }
-template <class Real>
-MinimalAreaTriangulation<Real>::~MinimalAreaTriangulation(void)
+template< class Real , unsigned int Dim >
+_MinimalAreaTriangulation< Real , Dim >::~_MinimalAreaTriangulation( void )
 {
-	if(bestTriangulation)
-		delete[] bestTriangulation;
-	bestTriangulation=NULL;
-	if(midPoint)
-		delete[] midPoint;
-	midPoint=NULL;
+	FreePointer( _bestTriangulation );
+	FreePointer( _midpoint );
 }
-template <class Real>
-void MinimalAreaTriangulation<Real>::GetTriangulation(const std::vector<Point3D<Real> >& vertices,std::vector<TriangleIndex>& triangles)
+template< class Real , unsigned int Dim >
+std::vector< TriangleIndex > _MinimalAreaTriangulation< Real , Dim >::getTriangulation( void )
 {
-	if(vertices.size()==3)
+	std::vector< TriangleIndex > triangles;
+	if( _vCount==3 )
 	{
 		triangles.resize(1);
-		triangles[0].idx[0]=0;
-		triangles[0].idx[1]=1;
-		triangles[0].idx[2]=2;
-		return;
+		triangles[0].idx[0] = 0;
+		triangles[0].idx[1] = 1;
+		triangles[0].idx[2] = 2;
+		return triangles;
 	}
-	else if(vertices.size()==4)
+	else if( _vCount==4 )
 	{
 		TriangleIndex tIndex[2][2];
-		Real area[2];
+		Real area[] = { 0 , 0 };
 
-		area[0]=area[1]=0;
 		triangles.resize(2);
 
 		tIndex[0][0].idx[0]=0;
@@ -77,141 +74,97 @@ void MinimalAreaTriangulation<Real>::GetTriangulation(const std::vector<Point3D<
 		tIndex[1][1].idx[1]=1;
 		tIndex[1][1].idx[2]=2;
 
-		Point3D<Real> n,p1,p2;
-		for(int i=0;i<2;i++)
-			for(int j=0;j<2;j++)
-			{
-				p1=vertices[tIndex[i][j].idx[1]]-vertices[tIndex[i][j].idx[0]];
-				p2=vertices[tIndex[i][j].idx[2]]-vertices[tIndex[i][j].idx[0]];
-				CrossProduct(p1,p2,n);
-				area[i] += Real( Length(n) );
-			}
-		if(area[0]>area[1])
-		{
-			triangles[0]=tIndex[1][0];
-			triangles[1]=tIndex[1][1];
-		}
-		else
-		{
-			triangles[0]=tIndex[0][0];
-			triangles[1]=tIndex[0][1];
-		}
-		return;
+		Point< Real , Dim > p1 , p2;
+		for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) area[i] = SquareArea( _vertices[ tIndex[i][j].idx[0] ] , _vertices[ tIndex[i][j].idx[1] ] , _vertices[ tIndex[i][j].idx[2] ] );
+		if( area[0]>area[1] ) triangles[0] = tIndex[1][0] , triangles[1] = tIndex[1][1];
+		else                  triangles[0] = tIndex[0][0] , triangles[1] = tIndex[0][1];
+		return triangles;
 	}
-	if(bestTriangulation)
-		delete[] bestTriangulation;
-	if(midPoint)
-		delete[] midPoint;
-	bestTriangulation=NULL;
-	midPoint=NULL;
-	size_t eCount=vertices.size();
-	bestTriangulation=new Real[eCount*eCount];
-	midPoint=new int[eCount*eCount];
-	for(size_t i=0;i<eCount*eCount;i++)
-		bestTriangulation[i]=-1;
-	memset(midPoint,-1,sizeof(int)*eCount*eCount);
-	GetArea(0,1,vertices);
-	triangles.clear();
-	GetTriangulation(0,1,vertices,triangles);
+	_set();
+	_addTriangles( 1 , 0 , triangles );
+	return triangles;
 }
-template <class Real>
-Real MinimalAreaTriangulation<Real>::GetArea(const std::vector<Point3D<Real> >& vertices)
+template< class Real , unsigned int Dim >
+void _MinimalAreaTriangulation< Real , Dim >::_set( void )
 {
-	if(bestTriangulation)
-		delete[] bestTriangulation;
-	if(midPoint)
-		delete[] midPoint;
-	bestTriangulation=NULL;
-	midPoint=NULL;
-	int eCount=vertices.size();
-	bestTriangulation=new double[eCount*eCount];
-	midPoint=new int[eCount*eCount];
-	for(int i=0;i<eCount*eCount;i++)
-		bestTriangulation[i]=-1;
-	memset(midPoint,-1,sizeof(int)*eCount*eCount);
-	return GetArea(0,1,vertices);
+	FreePointer( _bestTriangulation );
+	FreePointer( _midpoint );
+	_bestTriangulation = AllocPointer< Real >( _vCount * _vCount );
+	_midpoint = AllocPointer< int >( _vCount * _vCount );
+	for( int i=0 ; i<_vCount*_vCount ; i++ ) _bestTriangulation[i] = -1 , _midpoint[i] = -1;
+	_subPolygonArea( 1 , 0 );
 }
-template<class Real>
-void MinimalAreaTriangulation<Real>::GetTriangulation(const size_t& i,const size_t& j,const std::vector<Point3D<Real> >& vertices,std::vector<TriangleIndex>& triangles)
+
+template< class Real , unsigned int Dim > size_t _MinimalAreaTriangulation< Real , Dim >::_subPolygonIndex( size_t i , size_t j ) const { return i*_vCount+j; }
+
+template< class Real , unsigned int Dim >
+void _MinimalAreaTriangulation< Real , Dim >::_addTriangles( size_t i , size_t j , std::vector< TriangleIndex >& triangles ) const
 {
 	TriangleIndex tIndex;
-	size_t eCount=vertices.size();
-#ifdef BRUNO_LEVY_FIX
-	int ii=(int)i;
-	if( i<j ) ii += (int)eCount;
-#else // !BRUNO_LEVY_FIX
-	size_t ii=i;
-	if( i<j ) ii += eCount;
-#endif // BRUNO_LEVY_FIX
-	if( j+1>=ii )
-		return;
-	ii=midPoint[i*eCount+j];
-	if( ii>=0 )
+	if( j<i ) j += _vCount;
+	if( i==j || i+1==j ) return;
+	int mid = _midpoint[ _subPolygonIndex( i , j%_vCount ) ];
+	if( mid>=0 )
 	{
 		tIndex.idx[0] = int( i );
-		tIndex.idx[1] = int( j );
-		tIndex.idx[2] = int( ii );
-		triangles.push_back(tIndex);
-		GetTriangulation(i,ii,vertices,triangles);
-		GetTriangulation(ii,j,vertices,triangles);
+		tIndex.idx[1] = int( mid );
+		tIndex.idx[2] = int( j%_vCount );
+		triangles.push_back( tIndex );
+		_addTriangles( i , mid , triangles );
+		_addTriangles( mid , j , triangles );
 	}
 }
 
-template<class Real>
-Real MinimalAreaTriangulation<Real>::GetArea(const size_t& i,const size_t& j,const std::vector<Point3D<Real> >& vertices)
+// Get the minimial area of the sub-polygon [ v_i , ... , v_j ]
+template< class Real , unsigned int Dim >
+Real _MinimalAreaTriangulation< Real , Dim >::_subPolygonArea( size_t i , size_t j )
 {
-	Real a=FLT_MAX,temp;
-	size_t eCount=vertices.size();
-	size_t idx=i*eCount+j;
-	size_t ii=i;
-	if(i<j)
-		ii+=eCount;
-	if(j+1>=ii)
+	size_t idx = _subPolygonIndex( i , j );
+	if( _midpoint[idx]!=-1 ) return _bestTriangulation[idx];
+	Real a = FLT_MAX , temp;
+	if( j<i ) j += _vCount;
+	// If either i==j or i+1=j, the polygon has trivial area
+	if( i==j || i+1==j )
 	{
-		bestTriangulation[idx]=0;
+		_bestTriangulation[idx] = 0;
 		return 0;
 	}
-	if(midPoint[idx]!=-1)
-		return bestTriangulation[idx];
+	// If we have already computed the minimal area for this edge
+	if( _midpoint[idx]!=-1 ) return _bestTriangulation[idx];
 	int mid=-1;
-	for(size_t r=j+1;r<ii;r++)
+
+	// For each vertex r \in( i , j ):
+	// -- Construct the triangle ( j , r , i )
+	// -- Compute the Area(j,r,i) + Area( j , ... , r ) + Area( r , ... , i )
+	for( size_t r=i+1 ; r<j ; r++ )
 	{
-		size_t rr=r%eCount;
-		size_t idx1=i*eCount+rr,idx2=rr*eCount+j;
-		Point3D<Real> p,p1,p2;
-		p1=vertices[i]-vertices[rr];
-		p2=vertices[j]-vertices[rr];
-		CrossProduct(p1,p2,p);
-		temp = Real( Length(p) );
-		if(bestTriangulation[idx1]>=0)
+		size_t idx1 = _subPolygonIndex( i , r%_vCount ); // SubPolygon( r , ... , i )
+		size_t idx2 = _subPolygonIndex( r%_vCount , j%_vCount ); // SubPolygon( j , ... , r );
+
+		temp = SquareArea( _vertices[i] , _vertices[r%_vCount] , _vertices[j%_vCount] );
+		temp = temp<0 ? 0 : (Real)sqrt(temp);
+		// If we have already computed Area( r , ... , i ), use that.
+		if( _bestTriangulation[idx1]>=0 )
 		{
-			temp+=bestTriangulation[idx1];
-			if(temp>a)
-				continue;
-			if(bestTriangulation[idx2]>0)
-				temp+=bestTriangulation[idx2];
-			else
-				temp+=GetArea(rr,j,vertices);
+			temp += _bestTriangulation[idx1];
+			// If the partial area is already too large, terminate
+			if( temp>a ) continue; // Terminate early
+			// Otherwise, compute the total area
+			temp += _subPolygonArea( r%_vCount , j%_vCount );
 		}
 		else
 		{
-			if(bestTriangulation[idx2]>=0)
-				temp+=bestTriangulation[idx2];
-			else
-				temp+=GetArea(rr,j,vertices);
-			if(temp>a)
-				continue;
-			temp+=GetArea(i,rr,vertices);
+			// Otherwise, compute it now
+			temp += _subPolygonArea( r%_vCount , j%_vCount );
+			// If the partial area is already too large, terminate
+			if( temp>a ) continue;
+			// Otherwise, compute the total area
+			temp += _subPolygonArea( i , r%_vCount );
 		}
 
-		if(temp<a)
-		{
-			a=temp;
-			mid=int(rr);
-		}
+		if( temp<a ) a=temp , mid=(int)(r%_vCount);
 	}
-	bestTriangulation[idx]=a;
-	midPoint[idx]=mid;
-
+	_bestTriangulation[idx] = a;
+	_midpoint[idx] = mid;
 	return a;
 }
diff --git a/Src/MarchingCubes.cpp b/Src/MarchingCubes.cpp
deleted file mode 100644
index 4973835..0000000
--- a/Src/MarchingCubes.cpp
+++ /dev/null
@@ -1,1025 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-#include <math.h>
-#include "MarchingCubes.h"
-
-////////////
-// Square //
-////////////
-int Square::AntipodalCornerIndex(int idx){
-	int x,y;
-	FactorCornerIndex(idx,x,y);
-	return CornerIndex( (x+1)%2 , (y+1)%2 );
-}
-int Square::CornerIndex( int x , int y ){ return (y<<1)|x; }
-void Square::FactorCornerIndex( int idx , int& x , int& y ){ x=(idx>>0)&1 , y=(idx>>1)&1; }
-int Square::EdgeIndex( int orientation , int i )
-{
-	switch( orientation )
-	{
-		case 0: // x
-			if( !i ) return  0; // (0,0) -> (1,0)
-			else     return  2; // (0,1) -> (1,1)
-		case 1: // y
-			if( !i ) return  3; // (0,0) -> (0,1)
-			else     return  1; // (1,0) -> (1,1)
-	};
-	return -1;
-}
-void Square::FactorEdgeIndex(int idx,int& orientation,int& i){
-	switch(idx){
-		case 0: case 2:
-			orientation=0;
-			i=idx/2;
-			return;
-		case 1: case 3:
-			orientation=1;
-			i=((idx/2)+1)%2;
-			return;
-	};
-}
-void Square::EdgeCorners(int idx,int& c1,int& c2){
-	int orientation,i;
-	FactorEdgeIndex(idx,orientation,i);
-	switch(orientation){
-		case 0:
-			c1 = CornerIndex(0,i);
-			c2 = CornerIndex(1,i);
-			break;
-		case 1:
-			c1 = CornerIndex(i,0);
-			c2 = CornerIndex(i,1);
-			break;
-	};
-}
-int Square::ReflectEdgeIndex(int idx,int edgeIndex){
-	int orientation=edgeIndex%2;
-	int o,i;
-	FactorEdgeIndex(idx,o,i);
-	if(o!=orientation){return idx;}
-	else{return EdgeIndex(o,(i+1)%2);}
-}
-int Square::ReflectCornerIndex(int idx,int edgeIndex){
-	int orientation=edgeIndex%2;
-	int x,y;
-	FactorCornerIndex(idx,x,y);
-	switch(orientation){
-		case 0:	return CornerIndex((x+1)%2,y);
-		case 1:	return CornerIndex(x,(y+1)%2);
-	};
-	return -1;
-}
-
-
-
-//////////
-// Cube //
-//////////
-int Cube::CornerIndex( int x , int y , int z ){ return (z<<2)|(y<<1)|x; }
-void Cube::FactorCornerIndex( int idx , int& x , int& y , int& z ){ x = (idx>>0)&1 , y = (idx>>1)&1 , z = (idx>>2)&1; }
-int Cube::EdgeIndex(int orientation,int i,int j){return (i | (j<<1))|(orientation<<2);}
-void Cube::FactorEdgeIndex( int idx , int& orientation , int& i , int &j )
-{
-	orientation=idx>>2;
-	i = (idx&1);
-	j = (idx&2)>>1;
-}
-int Cube::FaceIndex( int x , int y , int z )
-{
-	if     ( x<0 ) return  0;
-	else if( x>0 ) return  1;
-	else if( y<0 ) return  2;
-	else if( y>0 ) return  3;
-	else if( z<0 ) return  4;
-	else if( z>0 ) return  5;
-	else           return -1;
-}
-int Cube::FaceIndex( int dir , int offSet ){ return (dir<<1)|offSet; }
-
-void Cube::FactorFaceIndex( int idx , int& x , int& y , int& z )
-{
-	x=y=z=0;
-	switch( idx )
-	{
-		case 0: x=-1; break;
-		case 1: x= 1; break;
-		case 2: y=-1; break;
-		case 3: y= 1; break;
-		case 4: z=-1; break;
-		case 5: z= 1; break;
-	};
-}
-void Cube::FactorFaceIndex( int idx , int& dir , int& offSet )
-{
-	dir  = idx>>1;
-	offSet=idx &1;
-}
-bool Cube::IsEdgeCorner( int cIndex , int e )
-{
-	int o , i , j;
-	FactorEdgeIndex( e , o , i , j );
-	switch( o )
-	{
-	case 0: return (cIndex && 2)==(i<<1) && (cIndex && 4)==(j<<2);
-	case 1: return (cIndex && 1)==(i<<0) && (cIndex && 4)==(j<<2);
-	case 2: return (cIndex && 4)==(i<<2) && (cIndex && 2)==(j<<1);
-	default: return false;
-	}
-}
-bool Cube::IsFaceCorner( int cIndex , int f )
-{
-	int dir , off;
-	FactorFaceIndex( f , dir , off );
-	return ( cIndex & (1<<dir) )==(off<<dir);
-}
-
-int Cube::FaceAdjacentToEdges( int eIndex1 , int eIndex2 )
-{
-	int f1,f2,g1,g2;
-	FacesAdjacentToEdge(eIndex1,f1,f2);
-	FacesAdjacentToEdge(eIndex2,g1,g2);
-	if(f1==g1 || f1==g2){return f1;}
-	if(f2==g1 || f2==g2){return f2;}
-	return -1;
-}
-
-void Cube::FacesAdjacentToEdge( int eIndex , int& f1Index , int& f2Index )
-{
-	int orientation,i1,i2;
-	FactorEdgeIndex(eIndex,orientation,i1,i2);
-	i1<<=1;
-	i2<<=1;
-	i1--;
-	i2--;
-	switch(orientation){
-		case 0:
-			f1Index=FaceIndex( 0,i1, 0);
-			f2Index=FaceIndex( 0, 0,i2);
-			break;
-		case 1:
-			f1Index=FaceIndex(i1, 0, 0);
-			f2Index=FaceIndex( 0, 0,i2);
-			break;
-		case 2:
-			f1Index=FaceIndex(i1, 0, 0);
-			f2Index=FaceIndex( 0,i2, 0);
-			break;
-	};
-}
-void Cube::EdgeCorners( int idx , int& c1 , int& c2 )
-{
-	int orientation,i1,i2;
-	FactorEdgeIndex(idx,orientation,i1,i2);
-	switch(orientation){
-		case 0:
-			c1=CornerIndex(0,i1,i2);
-			c2=CornerIndex(1,i1,i2);
-			break;
-		case 1:
-			c1=CornerIndex(i1,0,i2);
-			c2=CornerIndex(i1,1,i2);
-			break;
-		case 2:
-			c1=CornerIndex(i1,i2,0);
-			c2=CornerIndex(i1,i2,1);
-			break;
-	};
-}
-void Cube::FaceCorners( int idx , int& c1 , int& c2 , int& c3 , int& c4 )
-{
-	int i=idx%2;
-	switch(idx/2){
-	case 0:
-		c1=CornerIndex(i,0,0);
-		c2=CornerIndex(i,1,0);
-		c3=CornerIndex(i,0,1);
-		c4=CornerIndex(i,1,1);
-		return;
-	case 1:
-		c1=CornerIndex(0,i,0);
-		c2=CornerIndex(1,i,0);
-		c3=CornerIndex(0,i,1);
-		c4=CornerIndex(1,i,1);
-		return;
-	case 2:
-		c1=CornerIndex(0,0,i);
-		c2=CornerIndex(1,0,i);
-		c3=CornerIndex(0,1,i);
-		c4=CornerIndex(1,1,i);
-		return;
-	}
-}
-int Cube::AntipodalCornerIndex( int idx )
-{
-	int x,y,z;
-	FactorCornerIndex(idx,x,y,z);
-	return CornerIndex((x+1)%2,(y+1)%2,(z+1)%2);
-}
-int Cube::FaceReflectFaceIndex( int idx , int faceIndex )
-{
-	if(idx/2!=faceIndex/2) return idx;
-	else
-	{
-		if( idx%2 ) return idx-1;
-		else        return idx+1;
-	}
-}
-int Cube::FaceReflectEdgeIndex( int idx , int faceIndex )
-{
-	int orientation=faceIndex/2;
-	int o,i,j;
-	FactorEdgeIndex(idx,o,i,j);
-	if(o==orientation){return idx;}
-	switch(orientation){
-		case 0:	return EdgeIndex(o,(i+1)%2,j);
-		case 1:
-			switch(o){
-				case 0:	return EdgeIndex(o,(i+1)%2,j);
-				case 2:	return EdgeIndex(o,i,(j+1)%2);
-			};
-		case 2:	return EdgeIndex(o,i,(j+1)%2);
-	};
-	return -1;
-}
-int Cube::FaceReflectCornerIndex( int idx , int faceIndex )
-{
-	int orientation=faceIndex/2;
-	int x,y,z;
-	FactorCornerIndex(idx,x,y,z);
-	switch(orientation){
-		case 0:	return CornerIndex((x+1)%2,y,z);
-		case 1:	return CornerIndex(x,(y+1)%2,z);
-		case 2: return CornerIndex(x,y,(z+1)%2);
-	};
-	return -1;
-}
-int Cube::EdgeReflectCornerIndex( int idx , int edgeIndex )
-{
-	int orientation , x , y , z;
-	FactorEdgeIndex( edgeIndex , orientation , x , y );
-	FactorCornerIndex( idx , x , y , z );
-	switch(orientation)
-	{
-		case 0:	return CornerIndex(  x      , (y+1)%2 , (z+1)%2 );
-		case 1:	return CornerIndex( (x+1)%2 ,  y      , (z+1)%2 );
-		case 2:	return CornerIndex( (x+1)%2 , (y+1)%2 ,  z      );
-	};
-	return -1;
-}
-int	Cube::EdgeReflectEdgeIndex( int edgeIndex )
-{
-	int o , i1 , i2;
-	FactorEdgeIndex( edgeIndex , o , i1 , i2 );
-	return Cube::EdgeIndex( o , (i1+1)%2 , (i2+1)%2 );
-}
-
-
-/////////////////////
-// MarchingSquares //
-/////////////////////
-#if NEW_ORDERING
-#pragma message ( "[WARNING] Not clear if MarchingSquares::edgeMask and MarchingSquares::edges are set correctly" )
-const int MarchingSquares::cornerMap[] = { 0 , 1 , 3 , 2 };
-bool MarchingSquares::HasEdgeRoots( unsigned char mcIndex , int edgeIndex )
-{
-	int c1 , c2;
-	Square::EdgeCorners( edgeIndex , c1 , c2 );
-	return !( 
-		( ( mcIndex&(1<<MarchingSquares::cornerMap[c1]) ) &&  ( mcIndex&(1<<MarchingSquares::cornerMap[c2])) )
-		||
-		(!( mcIndex&(1<<MarchingSquares::cornerMap[c1]) ) && !( mcIndex&(1<<MarchingSquares::cornerMap[c2])) )
-		);
-}
-#endif // NEW_ORDERING
-/*
-0} // (0,0) -> (1,0)
-1} // (1,0) -> (1,1)
-2} // (0,1) -> (1,1)
-3} // (0,0) -> (0,1)
-*/
-const int MarchingSquares::edgeMask[1<<Square::CORNERS]=
-{
-	    0, //  0 ->         ->                         ->
-	    9, //  1 -> 0       -> (0,0)                   -> 0,3     ->  9 
-	    3, //  2 -> 1       -> (1,0)                   -> 0,1     ->  3
-	   10, //  3 -> 0,1     -> (0,0) (1,0)             -> 1,3     -> 10
-	   12, //  4 -> 2       -> (0,1)                   -> 2,3     -> 12
-	    5, //  5 -> 0,2     -> (0,0) (0,1)             -> 0,2     ->  5
-	   15, //  6 -> 1,2     -> (1,0) (0,1)             -> 0,1,2,3 -> 15
-	    6, //  7 -> 0,1,2   -> (0,0) (1,0) (0,1)       -> 1,2     ->  6
-	    6, //  8 -> 3       -> (1,1)                   -> 1,2     ->  6
-	   15, //  9 -> 0,3     -> (0,0) (1,1)             -> 0,1,2,3 -> 15 
-	    5, // 10 -> 1,3     -> (1,0) (1,1)             -> 0,2     ->  5
-	   12, // 11 -> 0,1,3   -> (0,0) (1,0) (1,1)       -> 2,3     -> 12
-	   10, // 12 -> 2,3     -> (0,1) (1,1)             -> 1,3     -> 10
-	    3, // 13 -> 0,2,3   -> (0,0) (0,1) (1,1)       -> 0,1     ->  3
-	    9, // 14 -> 1,2,3   -> (1,0) (0,1) (1,1)       -> 0,3     ->  9
-	    0, // 15 -> 0,1,2,3 -> (0,0) (1,0) (0,1) (1,1) -> 
-};
-#if NEW_ORDERING
-/*
-0} // (0,0) -> (1,0)
-1} // (1,0) -> (1,1)
-2} // (0,1) -> (1,1)
-3} // (0,0) -> (0,1)
-*/
-const int MarchingSquares::edges[1<<Square::CORNERS][MAX_EDGES*2+1] =
-{
-	// Positive to the right
-	/////////////////////////////////// (0,0) (1,0) (1,1) (0,1)
-	{ -1 ,  -1 ,  -1 ,  -1 ,  -1 } , //   -     -     -     -
-	{  3 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     -     -
-	{  0 ,   1 ,  -1 ,  -1 ,  -1 } , //   -     +     -     -
-	{  3 ,   1 ,  -1 ,  -1 ,  -1 } , //   +     +     -     -
-	{  1 ,   2 ,  -1 ,  -1 ,  -1 } , //   -     -     +     -
-	{  3 ,   2 ,   1 ,   0 ,  -1 } , //   +     -     +     -
-	{  0 ,   2 ,  -1 ,  -1 ,  -1 } , //   -     +     +     -
-	{  3 ,   2 ,  -1 ,  -1 ,  -1 } , //   +     +     +     -
-	// Now flip the edges
-	{  2 ,   3 ,  -1 ,  -1 ,  -1 } , //   -     -     -     +
-	{  2 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     -     +
-	{  2 ,   3 ,   0 ,   1 ,  -1 } , //   -     +     -     +
-	{  2 ,   1 ,  -1 ,  -1 ,  -1 } , //   +     +     -     +
-	{  1 ,   3 ,  -1 ,  -1 ,  -1 } , //   -     -     +     +
-	{  1 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     +     +
-	{  0 ,   3 ,  -1 ,  -1 ,  -1 } , //   -     +     +     +
-	{ -1 ,  -1 ,  -1 ,  -1 ,  -1 } , //   +     +     +     +
-};
-#else // !NEW_ORDERING
-const int MarchingSquares::edges[1<<Square::CORNERS][MAX_EDGES*2+1] = {
-	{ -1,  -1,  -1,  -1,  -1}, //
-	{  3,   0,  -1,  -1,  -1}, // (0,0)
-	{  0,   1,  -1,  -1,  -1}, // (1,0)
-	{  3,   1,  -1,  -1,  -1}, // (0,0) (1,0)
-	{  2,   3,  -1,  -1,  -1}, // (0,1)
-	{  2,   0,  -1,  -1,  -1}, // (0,0) (0,1)
-	{  0,   1,   2,   3,  -1}, // (1,0) (0,1)
-	{  1,   2,  -1,  -1,  -1}, // (0,0) (1,0) (0,1)
-	{  2,   1,  -1,  -1,  -1}, // (1,1)
-	{  3,   0,   1,   2,  -1}, // (0,0) (1,1)
-	{  0,   2,  -1,  -1,  -1}, // (1,0) (1,1)
-	{  3,   2,  -1,  -1,  -1}, // (0,0) (1,0) (1,1)
-	{  1,   3,  -1,  -1,  -1}, // (0,1) (1,1)
-	{  1,   0,  -1,  -1,  -1}, // (0,0) (0,1) (1,1)
-	{  0,   3,  -1,  -1,  -1}, // (1,0) (0,1) (1,1)
-	{ -1,  -1,  -1,  -1,  -1}, // (0,0) (1,0) (0,1) (1,1)
-};
-#endif // NEW_ORDERING
-
-double MarchingSquares::vertexList[Square::EDGES][2];
-
-unsigned char MarchingSquares::GetIndex( const double v[Square::CORNERS] , double iso )
-{
-	unsigned char idx=0;
-#if NEW_ORDERING
-	if( v[ Square::CornerIndex(0,0) ]<iso ) idx |=   1;
-	if( v[ Square::CornerIndex(1,0) ]<iso ) idx |=   2;
-	if( v[ Square::CornerIndex(1,1) ]<iso ) idx |=   4;
-	if( v[ Square::CornerIndex(0,1) ]<iso ) idx |=   8;
-#else // !NEW_ORDERING
-	for( int i=0 ; i<Square::CORNERS ; i++ ) if( v[i]<iso ) idx|=(1<<i);
-#endif // NEW_ORDERING
-	return idx;
-}
-unsigned char MarchingSquares::GetIndex( const float v[Square::CORNERS] , float iso )
-{
-	unsigned char idx=0;
-#if NEW_ORDERING
-	if( v[ Square::CornerIndex(0,0) ]<iso ) idx |=   1;
-	if( v[ Square::CornerIndex(1,0) ]<iso ) idx |=   2;
-	if( v[ Square::CornerIndex(1,1) ]<iso ) idx |=   4;
-	if( v[ Square::CornerIndex(0,1) ]<iso ) idx |=   8;
-#else // !NEW_ORDERING
-	for( int i=0 ; i<Square::CORNERS ; i++ ) if( v[i]<iso ) idx|=(1<<i);
-#endif // NEW_ORDERING
-	return idx;
-}
-
-bool MarchingSquares::IsAmbiguous( const double v[Square::CORNERS] , double isoValue ){ return IsAmbiguous( GetIndex( v , isoValue ) ); }
-bool MarchingSquares::HasRoots( unsigned char idx ){ return !(idx==0 || idx==15); }
-#if NEW_ORDERING
-bool MarchingSquares::IsAmbiguous( unsigned char idx ){ return (idx==5) || (idx==10); }
-#else // !NEW_ORDERING
-bool MarchingSquares::IsAmbiguous( unsigned char idx ){ return (idx==6) || (idx==9); }
-#endif // NEW_ORDERING
-int MarchingSquares::AddEdges(const double v[Square::CORNERS],double iso,Edge* isoEdges)
-{
-	unsigned char idx;
-	int nEdges=0;
-	Edge e;
-
-	idx=GetIndex(v,iso);
-
-	/* Cube is entirely in/out of the surface */
-	if (!edgeMask[idx]) return 0;
-
-	/* Find the vertices where the surface intersects the cube */
-	int i,j,ii=1;
-	for(i=0;i<12;i++){
-		if(edgeMask[idx] & ii){SetVertex(i,v,iso);}
-		ii<<=1;
-	}
-	/* Create the triangle */
-	for (i=0;edges[idx][i]!=-1;i+=2) {
-		for(j=0;j<2;j++){
-			e.p[0][j]=vertexList[edges[idx][i+0]][j];
-			e.p[1][j]=vertexList[edges[idx][i+1]][j];
-		}
-		isoEdges[nEdges++]=e;
-	}
-	return nEdges;
-}
-
-int MarchingSquares::AddEdgeIndices( unsigned char mcIndex , int* isoIndices )
-{
-	int nEdges = 0;
-	/* Square is entirely in/out of the surface */
-	if( !edgeMask[mcIndex] ) return 0;
-
-	/* Create the edges */
-	for( int i=0 ; edges[mcIndex][i]!=-1 ; i+=2 )
-	{
-		for( int j=0 ; j<2 ; j++ ) isoIndices[i+j] = edges[mcIndex][i+j];
-		nEdges++;
-	}
-	return nEdges;
-}
-int MarchingSquares::AddEdgeIndices( const double v[Square::CORNERS] , double iso , int* isoIndices ){ return AddEdgeIndices( GetIndex( v , iso ) , isoIndices ); }
-void MarchingSquares::SetVertex(int e,const double values[Square::CORNERS],double iso){
-	int o,i,c1,c2;
-	Square::FactorEdgeIndex(e,o,i);
-	Square::EdgeCorners(e,c1,c2);
-	switch(o){
-		case 0:
-			vertexList[e][0]=Interpolate(values[c1]-iso,values[c2]-iso);
-			vertexList[e][1]=i;
-			break;
-		case 1:
-			vertexList[e][1]=Interpolate(values[c1]-iso,values[c2]-iso);
-			vertexList[e][0]=i;
-			break;
-	}
-}
-double MarchingSquares::Interpolate(double v1,double v2){return v1/(v1-v2);}
-
-
-///////////////////
-// MarchingCubes //
-///////////////////
-const int MarchingCubes::edgeMask[1<<Cube::CORNERS]={
-	    0,  273,  545,  816, 2082, 2355, 2563, 2834,
-	 1042, 1283, 1587, 1826, 3120, 3361, 3601, 3840,
-	  324,   85,  869,  628, 2406, 2167, 2887, 2646,
-	 1366, 1095, 1911, 1638, 3444, 3173, 3925, 3652,
-	  644,  917,  165,  436, 2726, 2999, 2183, 2454,
-	 1686, 1927, 1207, 1446, 3764, 4005, 3221, 3460,
-	  960,  721,  481,  240, 3042, 2803, 2499, 2258,
-	 2002, 1731, 1523, 1250, 4080, 3809, 3537, 3264,
-	 2184, 2457, 2729, 3000,  170,  443,  651,  922,
-	 3226, 3467, 3771, 4010, 1208, 1449, 1689, 1928,
-	 2508, 2269, 3053, 2812,  494,  255,  975,  734,
-	 3550, 3279, 4095, 3822, 1532, 1261, 2013, 1740,
-	 2572, 2845, 2093, 2364,  558,  831,   15,  286,
-	 3614, 3855, 3135, 3374, 1596, 1837, 1053, 1292,
-	 2888, 2649, 2409, 2168,  874,  635,  331,   90,
-	 3930, 3659, 3451, 3178, 1912, 1641, 1369, 1096,
-	 1096, 1369, 1641, 1912, 3178, 3451, 3659, 3930,
-	   90,  331,  635,  874, 2168, 2409, 2649, 2888,
-	 1292, 1053, 1837, 1596, 3374, 3135, 3855, 3614,
-	  286,   15,  831,  558, 2364, 2093, 2845, 2572,
-	 1740, 2013, 1261, 1532, 3822, 4095, 3279, 3550,
-	  734,  975,  255,  494, 2812, 3053, 2269, 2508,
-	 1928, 1689, 1449, 1208, 4010, 3771, 3467, 3226,
-	  922,  651,  443,  170, 3000, 2729, 2457, 2184,
-	 3264, 3537, 3809, 4080, 1250, 1523, 1731, 2002,
-	 2258, 2499, 2803, 3042,  240,  481,  721,  960,
-	 3460, 3221, 4005, 3764, 1446, 1207, 1927, 1686,
-	 2454, 2183, 2999, 2726,  436,  165,  917,  644,
-	 3652, 3925, 3173, 3444, 1638, 1911, 1095, 1366,
-	 2646, 2887, 2167, 2406,  628,  869,   85,  324,
-	 3840, 3601, 3361, 3120, 1826, 1587, 1283, 1042,
-	 2834, 2563, 2355, 2082,  816,  545,  273,    0
-};
-const int MarchingCubes::triangles[1<<Cube::CORNERS][MAX_TRIANGLES*3+1] = {
-	{  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,   5,   8,   5,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   5,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,   1,   5,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,  11,   1,   9,   1,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,  11,   8,  11,   1,   8,   1,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   1,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   0,  10,   0,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   4,   1,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   9,  10,   9,   5,  10,   5,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,  10,   4,  11,   4,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,  10,   8,  11,   8,   0,  11,   0,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,  11,  10,   9,  10,   4,   9,   4,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,  11,   8,  11,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   6,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,   2,   0,   4,   6,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,   2,   8,   5,   0,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   4,   6,   9,   5,   6,   2,   9,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   5,  11,   8,   6,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   5,  11,   6,   2,   0,   4,   6,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,   2,   8,   9,  11,   1,   9,   1,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,  11,   2,   2,  11,   1,   2,   1,   6,   6,   1,   4,  -1,  -1,  -1,  -1},
-	{   1,  10,   4,   2,   8,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   0,   1,   6,   2,   1,  10,   6,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   4,   1,  10,   8,   6,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   2,   9,   5,   6,   2,   5,   1,   6,   1,  10,   6,  -1,  -1,  -1,  -1},
-	{   2,   8,   6,   4,   5,  11,   4,  11,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   2,   0,   6,   2,   5,  11,   6,   5,  10,   6,  11,  -1,  -1,  -1,  -1},
-	{   9,  11,  10,   9,  10,   4,   9,   4,   0,   8,   6,   2,  -1,  -1,  -1,  -1},
-	{   9,  11,   2,   2,  11,   6,  10,   6,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   2,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   9,   2,   4,   8,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   2,   7,   0,   7,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   5,   4,   2,   7,   4,   8,   2,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   9,   2,   5,  11,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   5,  11,   0,   4,   8,   9,   2,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   0,   2,   1,   2,   7,   1,   7,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   7,  11,   1,   2,   7,   1,   4,   2,   4,   8,   2,  -1,  -1,  -1,  -1},
-	{   4,   1,  10,   9,   2,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   9,   2,   0,   1,  10,   0,  10,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   1,  10,   2,   7,   5,   0,   2,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,  10,   8,   1,  10,   2,   7,   1,   2,   5,   1,   7,  -1,  -1,  -1,  -1},
-	{   7,   9,   2,  10,   4,   5,  11,  10,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,  10,   8,  11,   8,   0,  11,   0,   5,   9,   2,   7,  -1,  -1,  -1,  -1},
-	{  11,  10,   7,   7,  10,   4,   7,   4,   2,   2,   4,   0,  -1,  -1,  -1,  -1},
-	{  11,  10,   7,   7,  10,   2,   8,   2,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   9,   8,   6,   7,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   6,   7,   0,   4,   7,   9,   0,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,   7,   5,   8,   6,   5,   0,   8,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   6,   7,   5,   4,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,  11,   1,   8,   6,   7,   9,   8,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   6,   7,   0,   4,   7,   9,   0,   7,  11,   1,   5,  -1,  -1,  -1,  -1},
-	{   8,   1,   0,  11,   1,   8,   6,  11,   8,   7,  11,   6,  -1,  -1,  -1,  -1},
-	{  11,   6,   7,   1,   6,  11,   6,   1,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,  10,   4,   6,   7,   9,   6,   9,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   1,   9,   9,   1,  10,   9,  10,   7,   7,  10,   6,  -1,  -1,  -1,  -1},
-	{   6,   7,   5,   8,   6,   5,   0,   8,   5,   1,  10,   4,  -1,  -1,  -1,  -1},
-	{   1,   7,   5,  10,   7,   1,   7,  10,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,  10,   4,  11,   4,   5,   7,   9,   8,   6,   7,   8,  -1,  -1,  -1,  -1},
-	{   0,   6,   9,   9,   6,   7,   6,   0,   5,   5,  11,  10,   5,  10,   6,  -1},
-	{   8,   7,   0,   6,   7,   8,   4,   0,   7,  11,  10,   4,   7,  11,   4,  -1},
-	{  11,  10,   6,  11,   6,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,  11,   7,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   5,   0,  11,   7,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,   4,   8,   9,   5,   4,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   1,   5,   3,   5,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,   7,   3,   1,   5,   7,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   1,   0,   3,   0,   9,   3,   9,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   8,   9,   4,   8,   7,   3,   4,   7,   1,   4,   3,  -1,  -1,  -1,  -1},
-	{   1,  10,   4,   3,  11,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,  11,   7,   8,   0,   1,  10,   8,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   1,  10,   5,   0,   9,  11,   7,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   9,  10,   9,   5,  10,   5,   1,  11,   7,   3,  -1,  -1,  -1,  -1},
-	{   4,   5,   7,   4,   7,   3,   4,   3,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   3,   3,   8,   0,   3,   0,   7,   7,   0,   5,  -1,  -1,  -1,  -1},
-	{   4,   3,  10,   4,   7,   3,   4,   0,   7,   0,   9,   7,  -1,  -1,  -1,  -1},
-	{  10,   8,   3,   3,   8,   7,   9,   7,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,   8,   6,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,   2,   0,   4,   2,   4,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,   8,   6,   2,   5,   0,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   4,   6,   9,   5,   6,   2,   9,   6,   3,  11,   7,  -1,  -1,  -1,  -1},
-	{   8,   6,   2,   3,   1,   5,   3,   5,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   1,   5,   3,   5,   7,   6,   2,   0,   4,   6,   0,  -1,  -1,  -1,  -1},
-	{   3,   1,   0,   3,   0,   9,   3,   9,   7,   2,   8,   6,  -1,  -1,  -1,  -1},
-	{   9,   4,   2,   2,   4,   6,   4,   9,   7,   7,   3,   1,   7,   1,   4,  -1},
-	{   8,   6,   2,  11,   7,   3,   4,   1,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   0,   1,   6,   2,   1,  10,   6,   1,  11,   7,   3,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   4,   1,  10,   8,   6,   2,  11,   7,   3,  -1,  -1,  -1,  -1},
-	{  11,   7,   3,   5,   2,   9,   5,   6,   2,   5,   1,   6,   1,  10,   6,  -1},
-	{   4,   5,   7,   4,   7,   3,   4,   3,  10,   6,   2,   8,  -1,  -1,  -1,  -1},
-	{  10,   5,   3,   3,   5,   7,   5,  10,   6,   6,   2,   0,   6,   0,   5,  -1},
-	{   8,   6,   2,   4,   3,  10,   4,   7,   3,   4,   0,   7,   0,   9,   7,  -1},
-	{   9,   7,  10,  10,   7,   3,  10,   6,   9,   6,   2,   9,  -1,  -1,  -1,  -1},
-	{   3,  11,   9,   2,   3,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   8,   0,   2,   3,  11,   2,  11,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   2,   3,   0,   3,  11,   0,  11,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   3,   8,   8,   3,  11,   8,  11,   4,   4,  11,   5,  -1,  -1,  -1,  -1},
-	{   2,   3,   1,   2,   1,   5,   2,   5,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   3,   1,   2,   1,   5,   2,   5,   9,   0,   4,   8,  -1,  -1,  -1,  -1},
-	{   0,   2,   3,   0,   3,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   3,   8,   8,   3,   4,   1,   4,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,  10,   4,   9,   2,   3,  11,   9,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   0,  10,   0,   1,   3,  11,   9,   2,   3,   9,  -1,  -1,  -1,  -1},
-	{   0,   2,   3,   0,   3,  11,   0,  11,   5,   1,  10,   4,  -1,  -1,  -1,  -1},
-	{   5,   2,  11,  11,   2,   3,   2,   5,   1,   1,  10,   8,   1,   8,   2,  -1},
-	{  10,   2,   3,   9,   2,  10,   4,   9,  10,   5,   9,   4,  -1,  -1,  -1,  -1},
-	{   5,  10,   0,   0,  10,   8,  10,   5,   9,   9,   2,   3,   9,   3,  10,  -1},
-	{   0,   2,   4,   4,   2,  10,   3,  10,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   8,   2,  10,   2,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   9,   8,   3,  11,   8,   6,   3,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,  11,   9,   3,  11,   0,   4,   3,   0,   6,   3,   4,  -1,  -1,  -1,  -1},
-	{  11,   5,   3,   5,   0,   3,   0,   6,   3,   0,   8,   6,  -1,  -1,  -1,  -1},
-	{   3,   4,   6,  11,   4,   3,   4,  11,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   1,   6,   6,   1,   5,   6,   5,   8,   8,   5,   9,  -1,  -1,  -1,  -1},
-	{   0,   6,   9,   4,   6,   0,   5,   9,   6,   3,   1,   5,   6,   3,   5,  -1},
-	{   3,   1,   6,   6,   1,   8,   0,   8,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   1,   4,   3,   4,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   9,   8,   3,  11,   8,   6,   3,   8,   4,   1,  10,  -1,  -1,  -1,  -1},
-	{   3,   9,   6,  11,   9,   3,  10,   6,   9,   0,   1,  10,   9,   0,  10,  -1},
-	{   4,   1,  10,  11,   5,   3,   5,   0,   3,   0,   6,   3,   0,   8,   6,  -1},
-	{   5,  10,   6,   1,  10,   5,   6,  11,   5,   6,   3,  11,  -1,  -1,  -1,  -1},
-	{  10,   5,   3,   4,   5,  10,   6,   3,   5,   9,   8,   6,   5,   9,   6,  -1},
-	{   6,   3,  10,   9,   0,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,  10,   0,   0,  10,   4,   0,   8,   3,   8,   6,   3,  -1,  -1,  -1,  -1},
-	{   6,   3,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   6,  10,   0,   4,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,  10,   3,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   6,  10,   8,   9,   5,   8,   5,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   1,   5,  10,   3,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,   1,   5,  11,  10,   3,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   0,   9,  11,   1,   0,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,  11,   8,  11,   1,   8,   1,   4,  10,   3,   6,  -1,  -1,  -1,  -1},
-	{   4,   1,   3,   6,   4,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   1,   3,   8,   0,   3,   6,   8,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   3,   6,   4,   1,   3,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,   6,   6,   9,   5,   6,   5,   3,   3,   5,   1,  -1,  -1,  -1,  -1},
-	{   6,   4,   5,   6,   5,  11,   6,  11,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   6,   8,   0,   3,   6,   0,   5,   3,   5,  11,   3,  -1,  -1,  -1,  -1},
-	{   3,   9,  11,   0,   9,   3,   6,   0,   3,   4,   0,   6,  -1,  -1,  -1,  -1},
-	{   8,   9,   6,   6,   9,   3,  11,   3,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   8,  10,   3,   2,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   2,   0,  10,   3,   0,   4,  10,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   8,  10,   3,   8,   3,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   3,   2,  10,   3,   9,   5,  10,   9,   4,  10,   5,  -1,  -1,  -1,  -1},
-	{  11,   1,   5,   2,   8,  10,   3,   2,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   2,   0,  10,   3,   0,   4,  10,   0,   5,  11,   1,  -1,  -1,  -1,  -1},
-	{   9,  11,   1,   9,   1,   0,   2,   8,  10,   3,   2,  10,  -1,  -1,  -1,  -1},
-	{  10,   2,   4,   3,   2,  10,   1,   4,   2,   9,  11,   1,   2,   9,   1,  -1},
-	{   1,   3,   2,   4,   1,   2,   8,   4,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   1,   3,   2,   0,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   3,   2,   4,   1,   2,   8,   4,   2,   9,   5,   0,  -1,  -1,  -1,  -1},
-	{   9,   3,   2,   5,   3,   9,   3,   5,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   2,  11,  11,   2,   8,  11,   8,   5,   5,   8,   4,  -1,  -1,  -1,  -1},
-	{   5,   2,   0,  11,   2,   5,   2,  11,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   3,   8,   8,   3,   2,   3,   4,   0,   0,   9,  11,   0,  11,   3,  -1},
-	{   9,  11,   3,   9,   3,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   9,   2,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   2,   7,  10,   3,   6,   0,   4,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   7,   5,   0,   7,   0,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   5,   4,   2,   7,   4,   8,   2,   4,  10,   3,   6,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   9,   2,   7,   1,   5,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   9,   2,   7,   1,   5,  11,   0,   4,   8,  -1,  -1,  -1,  -1},
-	{   1,   0,   2,   1,   2,   7,   1,   7,  11,   3,   6,  10,  -1,  -1,  -1,  -1},
-	{  10,   3,   6,   1,   7,  11,   1,   2,   7,   1,   4,   2,   4,   8,   2,  -1},
-	{   9,   2,   7,   6,   4,   1,   6,   1,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   1,   3,   8,   0,   3,   6,   8,   3,   7,   9,   2,  -1,  -1,  -1,  -1},
-	{   0,   2,   7,   0,   7,   5,   4,   1,   3,   6,   4,   3,  -1,  -1,  -1,  -1},
-	{   2,   5,   8,   7,   5,   2,   6,   8,   5,   1,   3,   6,   5,   1,   6,  -1},
-	{   6,   4,   5,   6,   5,  11,   6,  11,   3,   7,   9,   2,  -1,  -1,  -1,  -1},
-	{   9,   2,   7,   0,   6,   8,   0,   3,   6,   0,   5,   3,   5,  11,   3,  -1},
-	{   3,   4,  11,   6,   4,   3,   7,  11,   4,   0,   2,   7,   4,   0,   7,  -1},
-	{  11,   3,   8,   8,   3,   6,   8,   2,  11,   2,   7,  11,  -1,  -1,  -1,  -1},
-	{   9,   8,  10,   7,   9,  10,   3,   7,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   0,   7,   0,   4,   7,   4,   3,   7,   4,  10,   3,  -1,  -1,  -1,  -1},
-	{   8,  10,   0,   0,  10,   3,   0,   3,   5,   5,   3,   7,  -1,  -1,  -1,  -1},
-	{  10,   5,   4,   3,   5,  10,   5,   3,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   8,  10,   7,   9,  10,   3,   7,  10,   1,   5,  11,  -1,  -1,  -1,  -1},
-	{   1,   5,  11,   9,   0,   7,   0,   4,   7,   4,   3,   7,   4,  10,   3,  -1},
-	{  11,   0,   7,   1,   0,  11,   3,   7,   0,   8,  10,   3,   0,   8,   3,  -1},
-	{   7,   1,   4,  11,   1,   7,   4,   3,   7,   4,  10,   3,  -1,  -1,  -1,  -1},
-	{   4,   9,   8,   7,   9,   4,   1,   7,   4,   3,   7,   1,  -1,  -1,  -1,  -1},
-	{   7,   1,   3,   9,   1,   7,   1,   9,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   7,   0,   0,   7,   5,   7,   8,   4,   4,   1,   3,   4,   3,   7,  -1},
-	{   5,   1,   3,   7,   5,   3,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   4,  11,  11,   4,   5,   4,   3,   7,   7,   9,   8,   7,   8,   4,  -1},
-	{   3,   9,   0,   7,   9,   3,   0,  11,   3,   0,   5,  11,  -1,  -1,  -1,  -1},
-	{   3,   7,  11,   8,   4,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   3,   7,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,  10,  11,   7,   6,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,  10,  11,   7,  10,   7,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   5,   0,   6,  10,  11,   7,   6,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,   5,   8,   5,   4,   6,  10,  11,   7,   6,  11,  -1,  -1,  -1,  -1},
-	{   5,   7,   6,   5,   6,  10,   5,  10,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   7,   6,   5,   6,  10,   5,  10,   1,   4,   8,   0,  -1,  -1,  -1,  -1},
-	{   1,   0,  10,  10,   0,   9,  10,   9,   6,   6,   9,   7,  -1,  -1,  -1,  -1},
-	{   1,   7,  10,  10,   7,   6,   7,   1,   4,   4,   8,   9,   4,   9,   7,  -1},
-	{   7,   6,   4,   7,   4,   1,   7,   1,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   0,   1,   8,   0,  11,   7,   8,  11,   6,   8,   7,  -1,  -1,  -1,  -1},
-	{   7,   6,   4,   7,   4,   1,   7,   1,  11,   5,   0,   9,  -1,  -1,  -1,  -1},
-	{  11,   6,   1,   7,   6,  11,   5,   1,   6,   8,   9,   5,   6,   8,   5,  -1},
-	{   4,   5,   7,   4,   7,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   7,   0,   0,   7,   8,   6,   8,   7,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   6,   9,   9,   6,   0,   4,   0,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   9,   7,   8,   7,   6,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,  10,  11,   2,   8,  11,   7,   2,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,  11,   4,   4,  11,   7,   4,   7,   0,   0,   7,   2,  -1,  -1,  -1,  -1},
-	{   8,  10,  11,   2,   8,  11,   7,   2,  11,   5,   0,   9,  -1,  -1,  -1,  -1},
-	{   9,   4,   2,   5,   4,   9,   7,   2,   4,  10,  11,   7,   4,  10,   7,  -1},
-	{   1,   8,  10,   2,   8,   1,   5,   2,   1,   7,   2,   5,  -1,  -1,  -1,  -1},
-	{   1,   7,  10,   5,   7,   1,   4,  10,   7,   2,   0,   4,   7,   2,   4,  -1},
-	{   7,   1,   9,   9,   1,   0,   1,   7,   2,   2,   8,  10,   2,  10,   1,  -1},
-	{   7,   2,   9,  10,   1,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   4,   2,   4,   1,   2,   1,   7,   2,   1,  11,   7,  -1,  -1,  -1,  -1},
-	{  11,   0,   1,   7,   0,  11,   0,   7,   2,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   0,   9,   8,   4,   2,   4,   1,   2,   1,   7,   2,   1,  11,   7,  -1},
-	{   2,   5,   1,   9,   5,   2,   1,   7,   2,   1,  11,   7,  -1,  -1,  -1,  -1},
-	{   4,   5,   8,   8,   5,   2,   7,   2,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   2,   0,   5,   7,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   7,   2,   4,   4,   2,   8,   4,   0,   7,   0,   9,   7,  -1,  -1,  -1,  -1},
-	{   7,   2,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,  11,   9,   6,  10,   9,   2,   6,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,  11,   9,   6,  10,   9,   2,   6,   9,   0,   4,   8,  -1,  -1,  -1,  -1},
-	{   5,  10,  11,   6,  10,   5,   0,   6,   5,   2,   6,   0,  -1,  -1,  -1,  -1},
-	{   2,   5,   8,   8,   5,   4,   5,   2,   6,   6,  10,  11,   6,  11,   5,  -1},
-	{  10,   1,   6,   1,   5,   6,   5,   2,   6,   5,   9,   2,  -1,  -1,  -1,  -1},
-	{   0,   4,   8,  10,   1,   6,   1,   5,   6,   5,   2,   6,   5,   9,   2,  -1},
-	{   1,   0,  10,  10,   0,   6,   2,   6,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   6,   1,   1,   6,  10,   1,   4,   2,   4,   8,   2,  -1,  -1,  -1,  -1},
-	{  11,   9,   1,   1,   9,   2,   1,   2,   4,   4,   2,   6,  -1,  -1,  -1,  -1},
-	{   8,   1,   6,   0,   1,   8,   2,   6,   1,  11,   9,   2,   1,  11,   2,  -1},
-	{  11,   6,   1,   1,   6,   4,   6,  11,   5,   5,   0,   2,   5,   2,   6,  -1},
-	{   2,   6,   8,  11,   5,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   6,   4,   2,   2,   4,   9,   5,   9,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   9,   6,   6,   9,   2,   6,   8,   5,   8,   0,   5,  -1,  -1,  -1,  -1},
-	{   0,   2,   6,   0,   6,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   2,   6,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,  10,  11,   9,   8,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   0,  11,   9,   4,  11,   0,  11,   4,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,  10,  11,   0,  10,   5,  10,   0,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,  10,  11,   5,   4,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,   8,  10,   5,   8,   1,   8,   5,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   4,  10,   0,   4,   9,  10,   5,   9,  10,   1,   5,  -1,  -1,  -1,  -1},
-	{   0,   8,  10,   1,   0,  10,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  10,   1,   4,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   4,   9,   8,   1,   9,   4,   9,   1,  11,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   1,  11,   9,   0,   1,   9,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  11,   0,   8,   5,   0,  11,   8,   1,  11,   8,   4,   1,  -1,  -1,  -1,  -1},
-	{  11,   5,   1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   5,   9,   8,   4,   5,   8,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   9,   0,   5,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{   8,   4,   0,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1},
-	{  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1}
-};
-const int MarchingCubes::cornerMap[Cube::CORNERS]={0,1,3,2,4,5,7,6};
-double MarchingCubes::vertexList[Cube::EDGES][3];
-
-unsigned char MarchingCubes::GetIndex(const double v[Cube::CORNERS],double iso)
-{
-	unsigned char idx=0;
-	if (v[Cube::CornerIndex(0,0,0)] < iso) idx |=   1;
-	if (v[Cube::CornerIndex(1,0,0)] < iso) idx |=   2;
-	if (v[Cube::CornerIndex(1,1,0)] < iso) idx |=   4;
-	if (v[Cube::CornerIndex(0,1,0)] < iso) idx |=   8;
-	if (v[Cube::CornerIndex(0,0,1)] < iso) idx |=  16;
-	if (v[Cube::CornerIndex(1,0,1)] < iso) idx |=  32;
-	if (v[Cube::CornerIndex(1,1,1)] < iso) idx |=  64;
-	if (v[Cube::CornerIndex(0,1,1)] < iso) idx |= 128;
-	return idx;
-}
-unsigned char MarchingCubes::GetFaceIndex( const double values[Cube::CORNERS] , double iso , int faceIndex )
-{
-	int i,j,x,y,z;
-	unsigned char idx=0;
-	double v[2][2];
-	Cube::FactorFaceIndex(faceIndex,x,y,z);
-	if		(x<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(0,i,j)];}}}
-	else if	(x>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(1,i,j)];}}}
-	else if	(y<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,0,j)];}}}
-	else if	(y>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,1,j)];}}}
-	else if	(z<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,j,0)];}}}
-	else if	(z>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,j,1)];}}}
-	if (v[0][0] < iso) idx |=   1;
-	if (v[1][0] < iso) idx |=   2;
-	if (v[1][1] < iso) idx |=   4;
-	if (v[0][1] < iso) idx |=   8;
-	return idx;
-}
-bool MarchingCubes::IsAmbiguous( const double v[Cube::CORNERS] , double isoValue , int faceIndex ){ return MarchingSquares::IsAmbiguous( GetFaceIndex( v , isoValue , faceIndex ) ); }
-bool MarchingCubes::HasRoots( const double v[Cube::CORNERS] , double isoValue , int faceIndex ){ return MarchingSquares::HasRoots( GetFaceIndex( v , isoValue , faceIndex ) ); }
-bool MarchingCubes::HasRoots( const double v[Cube::CORNERS] , double isoValue ){ return HasRoots( GetIndex( v , isoValue ) ); }
-bool MarchingCubes::HasRoots( unsigned char mcIndex ){ return !(mcIndex==0 || mcIndex==255); }
-int MarchingCubes::AddTriangles( const double v[Cube::CORNERS] , double iso , Triangle* isoTriangles )
-{
-	unsigned char idx;
-	int ntriang=0;
-	Triangle tri;
-
-	idx=GetIndex(v,iso);
-
-	/* Cube is entirely in/out of the surface */
-	if (!edgeMask[idx]) return 0;
-
-	/* Find the vertices where the surface intersects the cube */
-	int i,j,ii=1;
-	for(i=0;i<12;i++){
-		if(edgeMask[idx] & ii){SetVertex(i,v,iso);}
-		ii<<=1;
-	}
-	/* Create the triangle */
-	for( i=0 ; triangles[idx][i]!=-1 ; i+=3 )
-	{
-		for(j=0;j<3;j++){
-			tri.p[0][j]=vertexList[triangles[idx][i+0]][j];
-			tri.p[1][j]=vertexList[triangles[idx][i+1]][j];
-			tri.p[2][j]=vertexList[triangles[idx][i+2]][j];
-		}
-		isoTriangles[ntriang++]=tri;
-	}
-	return ntriang;
-}
-
-int MarchingCubes::AddTriangleIndices(const double v[Cube::CORNERS],double iso,int* isoIndices){
-	unsigned char idx;
-	int ntriang=0;
-
-	idx=GetIndex(v,iso);
-
-	/* Cube is entirely in/out of the surface */
-	if (!edgeMask[idx]) return 0;
-
-	/* Create the triangle */
-	for(int i=0;triangles[idx][i]!=-1;i+=3){
-		for(int j=0;j<3;j++){isoIndices[i+j]=triangles[idx][i+j];}
-		ntriang++;
-	}
-	return ntriang;
-}
-
-void MarchingCubes::SetVertex( int e , const double values[Cube::CORNERS] , double iso )
-{
-	double t;
-	int o , i1 , i2;
-	Cube::FactorEdgeIndex( e , o , i1 , i2 );
-	switch( o )
-	{
-	case 0:
-		t = Interpolate( values[ Cube::CornerIndex( 0 , i1 , i2 ) ] - iso , values[ Cube::CornerIndex( 1 , i1 , i2 ) ] - iso );
-		vertexList[e][0] = t , vertexList[e][1] = i1  , vertexList[e][2] = i2;
-		break;
-	case 1:
-		t = Interpolate( values[ Cube::CornerIndex( i1 , 0 , i2 ) ] - iso , values[ Cube::CornerIndex( i1 , 1 , i2 ) ] - iso );
-		vertexList[e][0] = i1 , vertexList[e][1] = t  , vertexList[e][2] = i2;
-		break;
-	case 2:
-		t = Interpolate( values[ Cube::CornerIndex( i1 , i2 , 0 ) ] - iso , values[ Cube::CornerIndex( i1 , i2 , 1 ) ] - iso );
-		vertexList[e][0] = i1 , vertexList[e][1] = i2  , vertexList[e][2] = t;
-		break;
-	}
-}
-double MarchingCubes::Interpolate( double v1 , double v2 ) { return v1/(v1-v2); }
-
-
-///////////////////////////////////
-unsigned char MarchingCubes::GetIndex(const float v[Cube::CORNERS],float iso){
-	unsigned char idx=0;
-	if (v[Cube::CornerIndex(0,0,0)] < iso) idx |=   1;
-	if (v[Cube::CornerIndex(1,0,0)] < iso) idx |=   2;
-	if (v[Cube::CornerIndex(1,1,0)] < iso) idx |=   4;
-	if (v[Cube::CornerIndex(0,1,0)] < iso) idx |=   8;
-	if (v[Cube::CornerIndex(0,0,1)] < iso) idx |=  16;
-	if (v[Cube::CornerIndex(1,0,1)] < iso) idx |=  32;
-	if (v[Cube::CornerIndex(1,1,1)] < iso) idx |=  64;
-	if (v[Cube::CornerIndex(0,1,1)] < iso) idx |= 128;
-	return idx;
-}
-unsigned char MarchingCubes::GetFaceIndex( const float values[Cube::CORNERS] , float iso , int faceIndex )
-{
-	int i,j,x,y,z;
-	unsigned char idx=0;
-	double v[2][2];
-	Cube::FactorFaceIndex(faceIndex,x,y,z);
-	if		(x<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(0,i,j)];}}}
-	else if	(x>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(1,i,j)];}}}
-	else if	(y<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,0,j)];}}}
-	else if	(y>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,1,j)];}}}
-	else if	(z<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,j,0)];}}}
-	else if	(z>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=values[Cube::CornerIndex(i,j,1)];}}}
-	if (v[0][0] < iso) idx |=   1;
-	if (v[1][0] < iso) idx |=   2;
-	if (v[1][1] < iso) idx |=   4;
-	if (v[0][1] < iso) idx |=   8;
-	return idx;
-}
-unsigned char MarchingCubes::GetFaceIndex( unsigned char mcIndex , int faceIndex )
-{
-	int i,j,x,y,z;
-	unsigned char idx=0;
-	int v[2][2];
-	Cube::FactorFaceIndex(faceIndex,x,y,z);
-	if		(x<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(0,i,j)]);}}}
-	else if	(x>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(1,i,j)]);}}}
-	else if	(y<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(i,0,j)]);}}}
-	else if	(y>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(i,1,j)]);}}}
-	else if	(z<0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(i,j,1)]);}}}
-	else if	(z>0){for(i=0;i<2;i++){for(j=0;j<2;j++){v[i][j]=mcIndex&(1<<MarchingCubes::cornerMap[Cube::CornerIndex(i,j,1)]);}}}
-	if (v[0][0]) idx |=   1;
-	if (v[1][0]) idx |=   2;
-	if (v[1][1]) idx |=   4;
-	if (v[0][1]) idx |=   8;
-	return idx;
-}
-bool MarchingCubes::IsAmbiguous( const float v[Cube::CORNERS] , float isoValue , int faceIndex ){ return MarchingSquares::IsAmbiguous( GetFaceIndex( v , isoValue , faceIndex ) ); }
-bool MarchingCubes::IsAmbiguous( unsigned char mcIndex , int faceIndex ){ return MarchingSquares::IsAmbiguous( GetFaceIndex( mcIndex , faceIndex ) ); }
-bool MarchingCubes::HasRoots( const float v[Cube::CORNERS] , float isoValue ){ return HasRoots( GetIndex( v , isoValue ) ); }
-bool MarchingCubes::HasRoots( const float v[Cube::CORNERS] , float isoValue , int faceIndex){ return MarchingSquares::HasRoots( GetFaceIndex( v , isoValue , faceIndex ) ); }
-bool MarchingCubes::HasFaceRoots( unsigned char mcIndex , int faceIndex ){ return MarchingSquares::HasRoots( GetFaceIndex( mcIndex , faceIndex ) ); }
-bool MarchingCubes::HasEdgeRoots( unsigned char mcIndex , int edgeIndex )
-{
-	int c1 , c2;
-	Cube::EdgeCorners( edgeIndex , c1 , c2 );
-	return !( 
-		( ( mcIndex&(1<<MarchingCubes::cornerMap[c1]) ) &&  ( mcIndex&(1<<MarchingCubes::cornerMap[c2])) )
-		||
-		(!( mcIndex&(1<<MarchingCubes::cornerMap[c1]) ) && !( mcIndex&(1<<MarchingCubes::cornerMap[c2])) )
-		);
-}
-int MarchingCubes::AddTriangles(const float v[Cube::CORNERS],float iso,Triangle* isoTriangles){
-	unsigned char idx;
-	int ntriang=0;
-	Triangle tri;
-
-	idx = GetIndex( v , iso );
-
-	/* Cube is entirely in/out of the surface */
-	if( !edgeMask[idx] ) return 0;
-
-	/* Find the vertices where the surface intersects the cube */
-	int i,j,ii=1;
-	for( i=0 ; i<12 ; i++ )
-	{
-		if( edgeMask[idx] & ii ) SetVertex( i , v , iso );
-		ii<<=1;
-	}
-	/* Create the triangle */
-	for (i=0;triangles[idx][i]!=-1;i+=3) {
-		for(j=0;j<3;j++){
-			tri.p[0][j]=vertexList[triangles[idx][i+0]][j];
-			tri.p[1][j]=vertexList[triangles[idx][i+1]][j];
-			tri.p[2][j]=vertexList[triangles[idx][i+2]][j];
-		}
-		isoTriangles[ntriang++]=tri;
-	}
-	return ntriang;
-}
-
-int MarchingCubes::AddTriangleIndices( const float v[Cube::CORNERS] , float iso , int* isoIndices ){ return AddTriangleIndices( GetIndex( v , iso ) , isoIndices ); }
-int MarchingCubes::AddTriangleIndices( int idx , int* isoIndices )
-{
-	int ntriang=0;
-
-	/* Cube is entirely in/out of the surface */
-	if (!edgeMask[idx]) return 0;
-
-	/* Create the triangle */
-	for(int i=0;triangles[idx][i]!=-1;i+=3){
-		for(int j=0;j<3;j++){isoIndices[i+j]=triangles[idx][i+j];}
-		ntriang++;
-	}
-	return ntriang;
-}
-
-void MarchingCubes::SetVertex( int e , const float values[Cube::CORNERS] , float iso )
-{
-	double t;
-	int o , i1 , i2;
-	Cube::FactorEdgeIndex( e , o , i1 , i2 );
-	switch( o )
-	{
-	case 0:
-		t = Interpolate( values[ Cube::CornerIndex( 0 , i1 , i2 ) ] - iso , values[ Cube::CornerIndex( 1 , i1 , i2 ) ] - iso );
-		vertexList[e][0] = t , vertexList[e][1] = i1  , vertexList[e][2] = i2;
-		break;
-	case 1:
-		t = Interpolate( values[ Cube::CornerIndex( i1 , 0 , i2 ) ] - iso , values[ Cube::CornerIndex( i1 , 1 , i2 ) ] - iso );
-		vertexList[e][0] = i1 , vertexList[e][1] = t  , vertexList[e][2] = i2;
-		break;
-	case 2:
-		t = Interpolate( values[ Cube::CornerIndex( i1 , i2 , 0 ) ] - iso , values[ Cube::CornerIndex( i1 , i2 , 1 ) ] - iso );
-		vertexList[e][0] = i1 , vertexList[e][1] = i2  , vertexList[e][2] = t;
-		break;
-	}
-}
-float MarchingCubes::Interpolate( float v1 , float v2 ){ return v1/(v1-v2); }
diff --git a/Src/MarchingCubes.h b/Src/MarchingCubes.h
index 56c6eb3..a4b0e5a 100644
--- a/Src/MarchingCubes.h
+++ b/Src/MarchingCubes.h
@@ -28,120 +28,728 @@ DAMAGE.
 
 #ifndef MARCHING_CUBES_INCLUDED
 #define MARCHING_CUBES_INCLUDED
-#include <vector>
+
+#include <stdio.h>
+#include <type_traits>
 #include "Geometry.h"
 
-#define NEW_ORDERING 1
+#include "Window.h"
 
-class Square
-{
-public:
-	const static unsigned int CORNERS=4 , EDGES=4 , FACES=1;
-	static int  CornerIndex			(int x,int y);
-	static int  AntipodalCornerIndex(int idx);
-	static void FactorCornerIndex	(int idx,int& x,int& y);
-	static int  EdgeIndex			(int orientation,int i);
-	static void FactorEdgeIndex		(int idx,int& orientation,int& i);
-
-	static int  ReflectCornerIndex	(int idx,int edgeIndex);
-	static int  ReflectEdgeIndex	(int idx,int edgeIndex);
-
-	static void EdgeCorners(int idx,int& c1,int &c2);
-};
-
-class Cube{
-public:
-	const static unsigned int CORNERS=8 , EDGES=12 , FACES=6;
-
-	static int  CornerIndex			( int x , int y , int z );
-	static void FactorCornerIndex	( int idx , int& x , int& y , int& z );
-	static int  EdgeIndex			( int orientation , int i , int j );
-	static void FactorEdgeIndex		( int idx , int& orientation , int& i , int &j);
-	static int  FaceIndex			( int dir , int offSet );
-	static int  FaceIndex			( int x , int y , int z );
-	static void FactorFaceIndex		( int idx , int& x , int &y , int& z );
-	static void FactorFaceIndex		( int idx , int& dir , int& offSet );
-
-	static int  AntipodalCornerIndex	( int idx );
-	static int  FaceReflectCornerIndex	( int idx , int faceIndex );
-	static int  FaceReflectEdgeIndex	( int idx , int faceIndex );
-	static int	FaceReflectFaceIndex	( int idx , int faceIndex );
-	static int	EdgeReflectCornerIndex	( int idx , int edgeIndex );
-	static int	EdgeReflectEdgeIndex	( int edgeIndex );
-
-	static int  FaceAdjacentToEdges	( int eIndex1 , int eIndex2 );
-	static void FacesAdjacentToEdge	( int eIndex , int& f1Index , int& f2Index );
-
-	static void EdgeCorners( int idx , int& c1 , int &c2 );
-	static void FaceCorners( int idx , int& c1 , int &c2 , int& c3 , int& c4 );
-
-	static bool IsEdgeCorner( int cIndex , int e );
-	static bool IsFaceCorner( int cIndex , int f );
-};
-
-class MarchingSquares
-{
-	static double Interpolate(double v1,double v2);
-	static void SetVertex(int e,const double values[Square::CORNERS],double iso);
-public:
-	const static unsigned int MAX_EDGES=2;
-	static const int edgeMask[1<<Square::CORNERS];
-	static const int edges[1<<Square::CORNERS][2*MAX_EDGES+1];
-	static double vertexList[Square::EDGES][2];
-#if NEW_ORDERING
-	static const int cornerMap[Square::CORNERS];
-#endif // NEW_ORDERING
-
-	static unsigned char GetIndex( const float  values[Square::CORNERS] , float  iso );
-	static unsigned char GetIndex( const double values[Square::CORNERS] , double iso );
-	static bool IsAmbiguous( const double v[Square::CORNERS] , double isoValue );
-	static bool IsAmbiguous( unsigned char idx );
-	static bool HasRoots( unsigned char mcIndex );
-#if NEW_ORDERING
-	static bool HasEdgeRoots( unsigned char mcIndex , int edgeIndex );
-#endif // NEW_ORDERING
-	static int AddEdges( const double v[Square::CORNERS] , double isoValue , Edge* edges );
-	static int AddEdgeIndices( const double v[Square::CORNERS] , double isoValue , int* edges);
-	static int AddEdgeIndices( unsigned char mcIndex , int* edges);
-};
-
-class MarchingCubes
+namespace HyperCube
 {
-	static void SetVertex(int e,const double values[Cube::CORNERS],double iso);
-	static unsigned char GetFaceIndex( const double values[Cube::CORNERS] , double iso , int faceIndex );
-
-	static void SetVertex(int e,const float values[Cube::CORNERS],float iso);
-	static unsigned char GetFaceIndex( const float values[Cube::CORNERS] , float iso , int faceIndex );
-
-public:
-	static unsigned char GetFaceIndex( unsigned char mcIndex , int faceIndex );
-	static double Interpolate(double v1,double v2);
-	static float Interpolate(float v1,float v2);
-	const static unsigned int MAX_TRIANGLES=5;
-	static const int edgeMask[1<<Cube::CORNERS];
-	static const int triangles[1<<Cube::CORNERS][3*MAX_TRIANGLES+1];
-	static const int cornerMap[Cube::CORNERS];
-	static double vertexList[Cube::EDGES][3];
-
-	static int AddTriangleIndices(int mcIndex,int* triangles);
-
-	static unsigned char GetIndex( const double values[Cube::CORNERS] , double iso );
-	static bool IsAmbiguous( const double v[Cube::CORNERS] , double isoValue , int faceIndex );
-	static bool HasRoots( const double v[Cube::CORNERS] , double isoValue );
-	static bool HasRoots( const double v[Cube::CORNERS] , double isoValue , int faceIndex );
-	static int AddTriangles( const double v[Cube::CORNERS] , double isoValue , Triangle* triangles );
-	static int AddTriangleIndices( const double v[Cube::CORNERS] , double isoValue , int* triangles );
-
-	static unsigned char GetIndex( const float values[Cube::CORNERS] , float iso );
-	static bool IsAmbiguous( const float v[Cube::CORNERS] , float isoValue , int faceIndex );
-	static bool HasRoots( const float v[Cube::CORNERS] , float isoValue );
-	static bool HasRoots( const float v[Cube::CORNERS] , float isoValue , int faceIndex );
-	static int AddTriangles( const float v[Cube::CORNERS] , float isoValue , Triangle* triangles );
-	static int AddTriangleIndices( const float v[Cube::CORNERS] , float isoValue , int* triangles );
-
-	static bool IsAmbiguous( unsigned char mcIndex , int faceIndex );
-	static bool HasRoots( unsigned char mcIndex );
-	static bool HasFaceRoots( unsigned char mcIndex , int faceIndex );
-	static bool HasEdgeRoots( unsigned char mcIndex , int edgeIndex );
-};
+	enum Direction{ BACK , CROSS , FRONT };
+	inline Direction Opposite( Direction dir ){ return dir==BACK ? FRONT : ( dir==FRONT ? BACK : CROSS ); }
+
+	// The number of k-dimensional elements in a d-dimensional cube is equal to
+	// the number of (k-1)-dimensional elements in a (d-1)-dimensional hypercube plus twice the number of k-dimensional elements in a (d-1)-dimensional hypercube
+	
+	// Number of elements of dimension K in a cube of dimension D
+	template< unsigned int D , unsigned int K > struct ElementNum         { static const unsigned int Value = 2 * ElementNum< D-1 , K >::Value + ElementNum< D-1 , K-1 >::Value; };
+	template< unsigned int D                  > struct ElementNum< D , 0 >{ static const unsigned int Value = 2 * ElementNum< D-1 , 0 >::Value; };
+	template< unsigned int D                  > struct ElementNum< D , D >{ static const unsigned int Value = 1; };
+	template<                                 > struct ElementNum< 0 , 0 >{ static const unsigned int Value = 1; };
+	// [WARNING] This shouldn't really happen, but we need to support the definition of OverlapElementNum
+	template<                  unsigned int K > struct ElementNum< 0 , K >{ static const unsigned int Value = K==0 ? 1 : 0; };
+
+	template< unsigned int D , unsigned int K1 , unsigned int K2 > struct OverlapElementNum             { static const unsigned int Value = K1>=K2 ? ElementNum< K1 , K2 >::Value : OverlapElementNum< D-1 , K1 , K2 >::Value + OverlapElementNum< D-1 , K1 , K2-1 >::Value; };
+	template< unsigned int D ,                   unsigned int K  > struct OverlapElementNum< D , D , K >{ static const unsigned int Value = ElementNum< D , K >::Value; };
+	template< unsigned int D                                     > struct OverlapElementNum< D , D , 0 >{ static const unsigned int Value = ElementNum< D , 0 >::Value; };
+	template< unsigned int D , unsigned int K                    > struct OverlapElementNum< D , K , 0 >{ static const unsigned int Value = ElementNum< K , 0 >::Value; };
+	template< unsigned int D , unsigned int K                    > struct OverlapElementNum< D , K , K >{ static const unsigned int Value = 1; };
+	template< unsigned int D , unsigned int K                    > struct OverlapElementNum< D , K , D >{ static const unsigned int Value = 1; };
+	template< unsigned int D                                     > struct OverlapElementNum< D , D , D >{ static const unsigned int Value = 1; };
+	template< unsigned int D                                     > struct OverlapElementNum< D , 0 , 0 >{ static const unsigned int Value = 1; };
+
+	template< unsigned int D >
+	struct Cube
+	{
+		// Corner index (x,y,z,...) -> x + 2*y + 4*z + ...
+		// CROSS -> the D-th axis 
+
+		// Representation of a K-dimensional element of the cube
+		template< unsigned int K >
+		struct Element
+		{
+			static_assert( D>=K , "[ERROR] Element dimension exceeds cube dimension" );
+
+			// The index of the element, sorted as:
+			// 1. All K-dimensional elements contained in the back face
+			// 2. All K-dimensional elements spanning the D-th axis
+			// 3. All K-dimensional elements contained in the front face
+			unsigned int index;
+
+			// Initialize by index
+			Element( unsigned int idx=0 );
+
+			// Initialize by co-index:
+			// 1. A K-dimensional element in either BACK or FRONT
+			// 2. A (K-1)-dimensional element extruded across the D-th axis
+			Element( Direction dir , unsigned int coIndex );
+
+			// Given a K-Dimensional sub-element living inside a DK-dimensional sub-cube, get the element relative to the D-dimensional cube
+			template< unsigned int DK >
+			Element( Element< DK > subCube , typename Cube< DK >::template Element< K > subElement );
+
+			// Initialize by setting the directions
+			Element( const Direction dirs[D] );
+
+			// Print the element to the specified stream
+			void print( FILE* fp=stdout ) const;
+
+			// Sets the direction and co-index of the element
+			void factor( Direction& dir , unsigned int& coIndex ) const;
+
+			// Returns the direction along which the element lives
+			Direction direction( void ) const;
+
+			// Returns the co-index of the element
+			unsigned int coIndex( void ) const;
+
+			// Compute the directions of the element
+			void directions( Direction* dirs ) const;
+
+			// Returns the antipodal element
+			typename Cube< D >::template Element< K > antipodal( void ) const;
+
+			// Comparison operators
+			bool operator <  ( Element e ) const { return index< e.index; }
+			bool operator <= ( Element e ) const { return index<=e.index; }
+			bool operator >  ( Element e ) const { return index> e.index; }
+			bool operator >= ( Element e ) const { return index>=e.index; }
+			bool operator == ( Element e ) const { return index==e.index; }
+			bool operator != ( Element e ) const { return index!=e.index; }
+			bool operator <  ( unsigned int i ) const { return index< i; }
+			bool operator <= ( unsigned int i ) const { return index<=i; }
+			bool operator >  ( unsigned int i ) const { return index> i; }
+			bool operator >= ( unsigned int i ) const { return index>=i; }
+			bool operator == ( unsigned int i ) const { return index==i; }
+			bool operator != ( unsigned int i ) const { return index!=i; }
+
+			// Increment operators
+			Element& operator ++ ( void ) { index++ ; return *this; }
+			Element  operator ++ ( int ) { index++ ; return Element(index-1); }
+		protected:
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D!=0 && _K!=0 >::type _setElement( Direction dir , unsigned int coIndex );
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D!=0 && _K==0 >::type _setElement( Direction dir , unsigned int coIndex );
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D==0 && _K==0 >::type _setElement( Direction dir , unsigned int coIndex );
+
+			template< unsigned int KD > typename std::enable_if< (D> KD) && (KD>K) && K!=0 >::type _setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement );
+			template< unsigned int KD > typename std::enable_if< (D> KD) && (KD>K) && K==0 >::type _setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement );
+			template< unsigned int KD > typename std::enable_if< (D==KD) && (KD>K)         >::type _setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement );
+			template< unsigned int KD > typename std::enable_if< (KD==K)                   >::type _setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement );
+
+			template< unsigned int _D=D > typename std::enable_if< _D!=0 >::type _setElement( const Direction* dirs );
+			template< unsigned int _D=D > typename std::enable_if< _D==0 >::type _setElement( const Direction* dirs );
+
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D==_K          >::type _factor( Direction& dir , unsigned int& coIndex ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D!=_K && _K!=0 >::type _factor( Direction& dir , unsigned int& coIndex ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D!=_K && _K==0 >::type _factor( Direction& dir , unsigned int& coIndex ) const;
+
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< (_D>_K) && _K!=0 >::type _directions( Direction* dirs ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< (_D>_K) && _K==0 >::type _directions( Direction* dirs ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D==_K           >::type _directions( Direction* dirs ) const;
+
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< (_D>_K) && _K!=0 , Element >::type _antipodal( void ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< (_D>_K) && _K==0 , Element >::type _antipodal( void ) const;
+			template< unsigned int _D=D , unsigned int _K=K > typename std::enable_if< _D==_K           , Element >::type _antipodal( void ) const;
+		};
+		// A way of indexing the cubes incident on an element
+		template< unsigned int K > using IncidentCubeIndex = typename Cube< D-K >::template Element< 0 >;
+
+		// Number of elements of dimension K
+		template< unsigned int K > static constexpr unsigned int ElementNum( void ){ return HyperCube::ElementNum< D , K >::Value; }
+		// Number of cubes incident to an element of dimension K
+		template< unsigned int K > static constexpr unsigned int IncidentCubeNum( void ){ return HyperCube::ElementNum< D-K , 0 >::Value; }
+		// Number of overlapping elements of dimension K1 / K2
+		template< unsigned int K1 , unsigned int K2 > static constexpr unsigned int OverlapElementNum( void ){ return HyperCube::OverlapElementNum< D , K1 , K2 >::Value; }
+
+		// Is the face outward-facing
+		static bool IsOriented( Element< D-1 > e );
+
+		// Is one element contained in the other?
+		template< unsigned int K1 , unsigned int K2 >
+		static bool Overlap( Element< K1 > e1 , Element< K2 > e2 );
+
+		// If K1>K2: returns all elements contained in e
+		// Else:     returns all elements containing e
+		template< unsigned int K1 , unsigned int K2 >
+		static void OverlapElements( Element< K1 > e , Element< K2 >* es );
+
+		// Returns the marching-cubes index for the set of values
+		template< typename Real >
+		static unsigned int MCIndex( const Real values[ Cube::ElementNum< 0 >() ] , Real iso );
+
+		// Extracts the marching-cubes sub-index for the associated element
+		template< unsigned int K >
+		static unsigned int ElementMCIndex( Element< K > element , unsigned int mcIndex );
+
+		// Does the marching cubes index have a zero-crossing
+		static bool HasMCRoots( unsigned int mcIndex );
+
+		// Sets the offset of the incident cube relative to the center cube, x[i] \in {-1,0,1}
+		template< unsigned int K >
+		static void CellOffset( Element< K > e , IncidentCubeIndex< K > d , int x[D] );
+
+		// Returns the linearized offset of the incident cube relative to the center cube, \in [0,3^D)
+		template< unsigned int K >
+		static unsigned int CellOffset( Element< K > e , IncidentCubeIndex< K > d );
+
+		// Returns the index of the incident cube that is the source
+		template< unsigned int K >
+		static typename Cube< D >::template IncidentCubeIndex< K > IncidentCube( Element< K > e );
+
+		// Returns the corresponding element in the incident cube
+		template< unsigned int K >
+		static typename Cube< D >::template Element< K > IncidentElement( Element< K > e , IncidentCubeIndex< K > d );
+
+	protected:
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1>=K2) , bool >::type _Overlap( Element< K1 > e1 , Element< K2 > e2 );
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1< K2) , bool >::type _Overlap( Element< K1 > e1 , Element< K2 > e2 );
+
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1>=K2)                   >::type _OverlapElements( Element< K1 > e , Element< K2 >* es );
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1< K2) && D==K2          >::type _OverlapElements( Element< K1 > e , Element< K2 >* es );
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1< K2) && D!=K2 && K1!=0 >::type _OverlapElements( Element< K1 > e , Element< K2 >* es );
+		template< unsigned int K1 , unsigned int K2 > static typename std::enable_if< (K1< K2) && D!=K2 && K1==0 >::type _OverlapElements( Element< K1 > e , Element< K2 >* es );
+
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K                  , IncidentCubeIndex< K > >::type _IncidentCube( Element< K > e );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && _D!=0 && K!=0 , IncidentCubeIndex< K > >::type _IncidentCube( Element< K > e );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && _D!=0 && K==0 , IncidentCubeIndex< K > >::type _IncidentCube( Element< K > e );
+
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K         >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d , int* x );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K!=0 >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d , int* x );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K==0 >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d , int* x );
+
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K && K==0 , unsigned int >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K && K!=0 , unsigned int >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K!=0 , unsigned int >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K==0 , unsigned int >::type _CellOffset( Element< K > e , IncidentCubeIndex< K > d );
+
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K                  , Element< K > >::type _IncidentElement( Element< K > e , IncidentCubeIndex< K > d );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && _D!=0 && K!=0 , Element< K > >::type _IncidentElement( Element< K > e , IncidentCubeIndex< K > d );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && _D!=0 && K==0 , Element< K > >::type _IncidentElement( Element< K > e , IncidentCubeIndex< K > d );
+
+		template< unsigned int _D=D > static typename std::enable_if< _D!=1 >::type _FactorOrientation( Element< D-1 > e , unsigned int& dim , Direction& dir );
+		template< unsigned int _D=D > static typename std::enable_if< _D==1 >::type _FactorOrientation( Element< D-1 > e , unsigned int& dim , Direction& dir );
+
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K!=0 , unsigned int >::type _ElementMCIndex( Element< K > element , unsigned int mcIndex );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D!=K && K==0 , unsigned int >::type _ElementMCIndex( Element< K > element , unsigned int mcIndex );
+		template< unsigned int K , unsigned int _D=D > static typename std::enable_if< _D==K         , unsigned int >::type _ElementMCIndex( Element< K > element , unsigned int mcIndex );
+
+		template< unsigned int DD > friend struct Cube;
+	};
+
+	// Specialized class for extracting iso-curves from a square
+	struct MarchingSquares
+	{
+		const static unsigned int MAX_EDGES=2;
+		static const int edges[1<<HyperCube::Cube< 2 >::ElementNum< 0 >()][2*MAX_EDGES+1];
+		static int AddEdgeIndices( unsigned char mcIndex , int* edges);
+	};
+
+	///////////////////
+	// Cube::Element //
+	///////////////////
+	template< unsigned int D > template< unsigned int K >
+	Cube< D >::Element< K >::Element( unsigned int idx ) : index( idx ){}
+	template< unsigned int D > template< unsigned int K >
+	Cube< D >::Element< K >::Element( Direction dir , unsigned int coIndex ){ _setElement( dir , coIndex ); }
+	template< unsigned int D > template< unsigned int K > template< unsigned int DK >
+	Cube< D >::Element< K >::Element( Element< DK > subCube , typename Cube< DK >::template Element< K > subElement )
+	{
+		static_assert( DK>=K , "[ERROR] Element::Element: sub-cube dimension cannot be smaller than the sub-element dimension" );
+		static_assert( DK<=D , "[ERROR] Element::Element: sub-cube dimension cannot be larger than the cube dimension" );
+		_setElement( subCube , subElement );
+	}
+	template< unsigned int D > template< unsigned int K >
+	Cube< D >::Element< K >::Element( const Direction dirs[D] ){ _setElement( dirs ); }
+
+	template< unsigned int D > template< unsigned int K > template< unsigned int KD >
+	typename std::enable_if< (D>KD) && (KD>K) && K!=0 >::type Cube< D >::Element< K >::_setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement )
+	{
+		Direction dir ; unsigned int coIndex;
+		subCube.factor( dir , coIndex );
+		// If the sub-cube lies entirely in the back/front, we can compute the element in the smaller cube.
+		if( dir==BACK || dir==FRONT )
+		{
+			typename Cube< D-1 >::template Element< KD > _subCube( coIndex );
+			typename Cube< D-1 >::template Element< K > _element( _subCube , subElement );
+			*this = Element( dir , _element.index );
+		}
+		else
+		{
+			typename Cube< D-1 >::template Element< KD-1 > _subCube( coIndex );
+
+			Direction _dir ; unsigned int _coIndex;
+			subElement.factor( _dir , _coIndex );
+			// If the sub-element lies entirely in the back/front, we can compute the element in the smaller cube.
+			if( _dir==BACK || _dir==FRONT )
+			{
+				typename Cube< KD-1 >::template Element< K > _subElement( _coIndex );
+				typename Cube< D-1 >::template Element< K > _element( _subCube , _subElement );
+				*this = Element( _dir , _element.index );
+			}
+			// Otherwise
+			else
+			{
+				typename Cube< KD-1 >::template Element< K-1 > _subElement( _coIndex );
+				typename Cube< D-1 >::template Element< K-1 > _element( _subCube , _subElement );
+				*this = Element( _dir , _element.index );
+			}
+		}
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int KD >
+	typename std::enable_if< (D>KD) && (KD>K) && K==0 >::type Cube< D >::Element< K >::_setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement )
+	{
+		Direction dir ; unsigned int coIndex;
+		subCube.factor( dir , coIndex );
+		// If the sub-cube lies entirely in the back/front, we can compute the element in the smaller cube.
+		if( dir==BACK || dir==FRONT )
+		{
+			typename Cube< D-1 >::template Element< KD > _subCube( coIndex );
+			typename Cube< D-1 >::template Element< K > _element( _subCube , subElement );
+			*this = Element( dir , _element.index );
+		}
+		else
+		{
+			typename Cube< D-1 >::template Element< KD-1 > _subCube( coIndex );
+
+			Direction _dir ; unsigned int _coIndex;
+			subElement.factor( _dir , _coIndex );
+			// If the sub-element lies entirely in the back/front, we can compute the element in the smaller cube.
+			if( _dir==BACK || _dir==FRONT )
+			{
+				typename Cube< KD-1 >::template Element< K > _subElement( _coIndex );
+				typename Cube< D-1 >::template Element< K > _element( _subCube , _subElement );
+				*this = Element( _dir , _element.index );
+			}
+		}
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int KD >
+	typename std::enable_if< (D==KD) && (KD>K) >::type Cube< D >::Element< K >::_setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement ){ *this = subElement; }
+	template< unsigned int D > template< unsigned int K > template< unsigned int KD >
+	typename std::enable_if< (KD==K) >::type Cube< D >::Element< K >::_setElement( typename Cube< D >::template Element< KD > subCube , typename Cube< KD >::template Element< K > subElement ){ *this = subCube; }
+
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D!=0 && _K!=0 >::type Cube< D >::Element< K >::_setElement( Direction dir , unsigned int coIndex )
+	{
+		switch( dir )
+		{
+			case BACK:  index = coIndex ; break;
+			case CROSS: index = coIndex + HyperCube::ElementNum< D-1 , K >::Value ; break;
+			case FRONT: index = coIndex + HyperCube::ElementNum< D-1 , K >::Value + HyperCube::ElementNum< D-1 , K-1 >::Value ; break;
+			default: ERROR_OUT( "Bad direction: %d" , dir );
+		}
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D!=0 && _K==0 >::type Cube< D >::Element< K >::_setElement( Direction dir , unsigned int coIndex )
+	{
+		switch( dir )
+		{
+			case BACK:  index = coIndex ; break;
+			case FRONT: index = coIndex + HyperCube::ElementNum< D-1 , K >::Value ; break;
+			default: ERROR_OUT( "Bad direction: %d" , dir );
+		}
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D==0 && _K==0 >::type Cube< D >::Element< K >::_setElement( Direction dir , unsigned int coIndex ){ index = coIndex; }
+
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D >
+	typename std::enable_if< _D!=0 >::type Cube< D >::Element< K>::_setElement( const Direction* dirs )
+	{
+		if( dirs[D-1]==CROSS ) *this = Element( dirs[D-1] , typename Cube< D-1 >::template Element< K-1 >( dirs ).index );
+		else                   *this = Element( dirs[D-1] , typename Cube< D-1 >::template Element< K >( dirs ).index );
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D >
+	typename std::enable_if< _D==0 >::type Cube< D >::Element< K>::_setElement( const Direction* dirs ){}
+
+	template< unsigned int D > template< unsigned int  K >
+	void Cube< D >::Element< K >::print( FILE* fp ) const
+	{
+		Direction dirs[D==0?1:D];
+		directions( dirs );
+		for( int d=0 ; d<D ; d++ ) fprintf( fp , "%c" , dirs[d]==BACK ? 'B' : ( dirs[d]==CROSS ? 'C' : 'F' ) );
+	}
+
+	template< unsigned int D > template< unsigned int K >
+	void Cube< D >::Element< K >::factor( Direction& dir , unsigned int& coIndex ) const { _factor( dir , coIndex ); }
+
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D!=_K && _K!=0 >::type Cube< D >::Element< K >::_factor( Direction& dir , unsigned int& coIndex ) const
+	{
+		if     ( index<HyperCube::ElementNum< D-1 , K >::Value )                                             dir = BACK  , coIndex = index;
+		else if( index<HyperCube::ElementNum< D-1 , K >::Value + HyperCube::ElementNum< D-1 , K-1 >::Value ) dir = CROSS , coIndex = index - HyperCube::ElementNum< D-1 , K >::Value;
+		else                                                                                                 dir = FRONT , coIndex = index - HyperCube::ElementNum< D-1 , K >::Value - HyperCube::ElementNum< D-1 , K-1 >::Value;
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D!=_K && _K==0 >::type Cube< D >::Element< K >::_factor( Direction& dir , unsigned int& coIndex ) const
+	{
+		if     ( index<HyperCube::ElementNum< D-1 , K >::Value ) dir = BACK  , coIndex = index;
+		else                                                     dir = FRONT , coIndex = index - HyperCube::ElementNum< D-1 , K >::Value;
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D==_K >::type Cube< D >::Element< K >::_factor( Direction& dir , unsigned int& coIndex ) const { dir=CROSS , coIndex=0; }
+
+	template< unsigned int D > template< unsigned int K >
+	Direction Cube< D >::Element< K >::direction( void ) const
+	{
+		Direction dir ; unsigned int coIndex;
+		factor( dir , coIndex );
+		return dir;
+	}
+	template< unsigned int D > template< unsigned int K >
+	unsigned int Cube< D >::Element< K >::coIndex( void ) const
+	{
+		Direction dir ; unsigned int coIndex;
+		factor( dir , coIndex );
+		return coIndex;
+	}
+
+	template< unsigned int D > template< unsigned int K >
+	void Cube< D >::Element< K >::directions( Direction* dirs ) const { _directions( dirs ); }
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< (_D>_K) && _K!=0 >::type Cube< D >::Element< K >::_directions( Direction* dirs ) const
+	{
+		unsigned int coIndex;
+		factor( dirs[D-1] , coIndex );
+		if( dirs[D-1]==CROSS ) typename Cube< D-1 >::template Element< K-1 >( coIndex ).directions( dirs );
+		else                   typename Cube< D-1 >::template Element< K   >( coIndex ).directions( dirs );
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< (_D>_K) && _K==0 >::type Cube< D >::Element< K >::_directions( Direction* dirs ) const
+	{
+		unsigned int coIndex;
+		factor( dirs[D-1] , coIndex );
+		if( dirs[D-1]==FRONT || dirs[D-1]==BACK ) typename Cube< D-1 >::template Element< K >( coIndex ).directions( dirs );
+	}
+
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+	typename std::enable_if< _D==_K >::type Cube< D >::Element< K >::_directions( Direction* dirs ) const { for( int d=0 ; d<D ; d++ ) dirs[d] = CROSS; }
+
+	template< unsigned int D > template< unsigned int K >
+	typename Cube< D >::template Element< K > Cube< D >::Element< K >::antipodal( void ) const { return _antipodal(); }
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+#ifdef _MSC_VER
+	typename std::enable_if< (_D>_K) && _K!=0 , typename Cube< D >::Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const
+#else // !_MSC_VER
+	typename std::enable_if< (_D>_K) && _K!=0 , typename Cube< D >::template Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const
+#endif // _MSC_VER
+	{
+		Direction dir ; unsigned int coIndex;
+		factor( dir , coIndex );
+		if     ( dir==CROSS ) return Element< K >( CROSS , typename Cube< D-1 >::template Element< K-1 >( coIndex ).antipodal().index );
+		else if( dir==FRONT ) return Element< K >( BACK  , typename Cube< D-1 >::template Element< K   >( coIndex ).antipodal().index );
+		else if( dir==BACK  ) return Element< K >( FRONT , typename Cube< D-1 >::template Element< K   >( coIndex ).antipodal().index );
+		return Element< K >();
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+#ifdef _MSC_VER
+	typename std::enable_if< (_D>_K) && _K==0 , typename Cube< D >::Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const
+#else // !_MSC_VER
+	typename std::enable_if< (_D>_K) && _K==0 , typename Cube< D >::template Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const
+#endif // _MSC_VER
+	{
+		Direction dir ; unsigned int coIndex;
+		factor( dir , coIndex );
+		if     ( dir==FRONT ) return Element< K >( BACK  , typename Cube< D-1 >::template Element< K >( coIndex ).antipodal().index );
+		else if( dir==BACK  ) return Element< K >( FRONT , typename Cube< D-1 >::template Element< K >( coIndex ).antipodal().index );
+		return Element< K >();
+	}
+	template< unsigned int D > template< unsigned int K > template< unsigned int _D , unsigned int _K >
+#ifdef _MSC_VER
+	typename std::enable_if< _D==_K , typename Cube< D >::Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const { return *this; }
+#else // !_MSC_VER
+	typename std::enable_if< _D==_K , typename Cube< D >::template Element< K > >::type Cube< D >::Element< K >::_antipodal( void ) const { return *this; }
+#endif // _MSC_VER
+
+	//////////
+	// Cube //
+	//////////
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	bool Cube< D >::Overlap( Element< K1 > e1 , Element< K2 > e2 ){ return _Overlap( e1 , e2 ); }
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1>=K2) , bool >::type Cube< D >::_Overlap( Element< K1 > e1 , Element< K2 > e2 )
+	{
+		Direction dir1[ D ] , dir2[ D ];
+		e1.directions( dir1 ) , e2.directions( dir2 );
+		for( int d=0 ; d<D ; d++ ) if( dir1[d]!=CROSS && dir1[d]!=dir2[d] ) return false;
+		return true;
+	}
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1< K2) , bool >::type Cube< D >::_Overlap( Element< K1 > e1 , Element< K2 > e2 ){ return _Overlap( e2 , e1 ); }
+
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	void Cube< D >::OverlapElements( Element< K1 > e , Element< K2 >* es ){ _OverlapElements( e , es ); }
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1>=K2) >::type Cube< D >::_OverlapElements( Element< K1 > e , Element< K2 >* es )
+	{
+		for( typename Cube< K1 >::template Element< K2 > _e ; _e<Cube< K1 >::template ElementNum< K2 >() ; _e++ ) es[_e.index] = Element< K2 >( e , _e );
+	}
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1< K2) && D==K2 >::type Cube< D >::_OverlapElements( Element< K1 > e , Element< K2 >* es )
+	{
+		es[0] = Element< D >();
+	}
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1< K2) && D!=K2 && K1!=0 >::type Cube< D >::_OverlapElements( Element< K1 > e , Element< K2 >* es )
+	{
+		Direction dir = e.direction() ; unsigned int coIndex;
+		e.factor( dir , coIndex );
+		if( dir==FRONT || dir==BACK )
+		{
+			typename Cube< D-1 >::template Element< K2   > _es1[ HyperCube::OverlapElementNum< D-1 , K1 , K2   >::Value ];
+			typename Cube< D-1 >::template Element< K2-1 > _es2[ HyperCube::OverlapElementNum< D-1 , K1 , K2-1 >::Value ];
+			Cube< D-1 >::OverlapElements( typename Cube< D-1 >::template Element< K1 >( coIndex ) , _es1 );
+			Cube< D-1 >::OverlapElements( typename Cube< D-1 >::template Element< K1 >( coIndex ) , _es2 );
+			for( unsigned int i=0 ; i<HyperCube::OverlapElementNum< D-1 , K1 , K2   >::Value ; i++ ) es[i] = typename Cube< D >::template Element< K2 >( dir   , _es1[i].index );
+			es += HyperCube::OverlapElementNum< D-1 , K1 , K2 >::Value;
+			for( unsigned int i=0 ; i<HyperCube::OverlapElementNum< D-1 , K1 , K2-1 >::Value ; i++ ) es[i] = typename Cube< D >::template Element< K2 >( CROSS , _es2[i].index );
+		}
+		else if( dir==CROSS )
+		{
+			typename Cube< D-1 >::template Element< K2-1 > _es1[ HyperCube::OverlapElementNum< D-1 , K1-1 , K2-1 >::Value ];
+			Cube< D-1 >::OverlapElements( typename Cube< D-1 >::template Element< K1-1 >( coIndex ) , _es1 );
+			for( unsigned int i=0 ; i<HyperCube::OverlapElementNum< D-1 , K1-1 , K2-1 >::Value ; i++ ) es[i] = typename Cube< D >::template Element< K2 >( CROSS , _es1[i].index );
+		}
+	}
+	template< unsigned int D > template< unsigned int K1 , unsigned int K2 >
+	typename std::enable_if< (K1< K2) && D!=K2 && K1==0 >::type Cube< D >::_OverlapElements( Element< K1 > e , Element< K2 >* es )
+	{
+		Direction dir = e.direction() ; unsigned int coIndex;
+		e.factor( dir , coIndex );
+		if( dir==FRONT || dir==BACK )
+		{
+			typename Cube< D-1 >::template Element< K2   > _es1[ HyperCube::OverlapElementNum< D-1 , K1 , K2   >::Value ];
+			typename Cube< D-1 >::template Element< K2-1 > _es2[ HyperCube::OverlapElementNum< D-1 , K1 , K2-1 >::Value ];
+			Cube< D-1 >::OverlapElements( typename Cube< D-1 >::template Element< K1 >( coIndex ) , _es1 );
+			Cube< D-1 >::OverlapElements( typename Cube< D-1 >::template Element< K1 >( coIndex ) , _es2 );
+			for( unsigned int i=0 ; i<HyperCube::OverlapElementNum< D-1 , K1 , K2   >::Value ; i++ ) es[i] = typename Cube< D >::template Element< K2 >( dir   , _es1[i].index );
+			es += HyperCube::OverlapElementNum< D-1 , K1 , K2 >::Value;
+			for( unsigned int i=0 ; i<HyperCube::OverlapElementNum< D-1 , K1 , K2-1 >::Value ; i++ ) es[i] = typename Cube< D >::template Element< K2 >( CROSS , _es2[i].index );
+		}
+	}
+
+	template< unsigned int D > template< unsigned int K >
+	typename Cube< D >::template IncidentCubeIndex< K > Cube< D >::IncidentCube( Element< K > e ){ return _IncidentCube( e ); }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D==K , typename Cube< D >::IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e ){ return IncidentCubeIndex< D >(); }
+#else // !_MSC_VER
+	typename std::enable_if< _D==K , typename Cube< D >::template IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e ){ return IncidentCubeIndex< D >(); }
+#endif // _MSC_VER
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K!=0 , typename Cube< D >::IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e )
+#else // !_MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K!=0 , typename Cube< D >::template IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e )
+#endif // _MSC_VER
+	{
+		Direction dir ; unsigned int coIndex;
+		e.factor( dir , coIndex );
+		if     ( dir==CROSS ) return                                 Cube< D-1 >::IncidentCube( typename Cube< D-1 >::template Element< K-1 >( coIndex ) );
+		else if( dir==FRONT ) return IncidentCubeIndex< K >( BACK  , Cube< D-1 >::IncidentCube( typename Cube< D-1 >::template Element< K   >( coIndex ) ).index );
+		else                  return IncidentCubeIndex< K >( FRONT , Cube< D-1 >::IncidentCube( typename Cube< D-1 >::template Element< K   >( coIndex ) ).index );
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K==0 , typename Cube< D >::IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e )
+#else // !_MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K==0 , typename Cube< D >::template IncidentCubeIndex< K > >::type Cube< D >::_IncidentCube( Element< K > e )
+#endif // _MSC_VER
+	{
+		Direction dir ; unsigned int coIndex;
+		e.factor( dir , coIndex );
+		if( dir==FRONT ) return IncidentCubeIndex< K >( BACK  , Cube< D-1 >::IncidentCube( typename Cube< D-1 >::template Element< K >( coIndex ) ).index );
+		else             return IncidentCubeIndex< K >( FRONT , Cube< D-1 >::IncidentCube( typename Cube< D-1 >::template Element< K >( coIndex ) ).index );
+	}
+
+	template< unsigned int D >
+	bool Cube< D >::IsOriented( Element< D-1 > e )
+	{
+		unsigned int dim ; Direction dir;
+		_FactorOrientation( e , dim , dir );
+		return (dir==FRONT) ^ ((D-dim-1)&1);
+	}
+	template< unsigned int D > template< unsigned int _D >
+	typename std::enable_if< _D!=1 >::type Cube< D >::_FactorOrientation( Element< D-1 > e , unsigned int& dim , Direction& dir )
+	{
+		unsigned int coIndex;
+		e.factor( dir , coIndex );
+		if( dir==CROSS ) Cube< D-1 >::template _FactorOrientation( typename Cube< D-1 >::template Element< D-2 >( coIndex ) , dim , dir );
+		else dim = D-1;
+	}
+	template< unsigned int D > template< unsigned int _D >
+	typename std::enable_if< _D==1 >::type Cube< D >::_FactorOrientation( Element< D-1 > e , unsigned int& dim , Direction& dir )
+	{
+		unsigned int coIndex;
+		e.factor( dir , coIndex );
+		dim = 0;
+	}
+
+	template< unsigned int D > template< typename Real >
+	unsigned int Cube< D >::MCIndex( const Real values[ Cube< D >::ElementNum< 0 >() ] , Real iso )
+	{
+		unsigned int mcIdx = 0;
+		for( unsigned int c=0 ; c<ElementNum< 0 >() ; c++ ) if( values[c]<iso ) mcIdx |= (1<<c);
+		return mcIdx;
+	}
+
+	template< unsigned int D > template< unsigned int K >
+	unsigned int Cube< D >::ElementMCIndex( Element< K > element , unsigned int mcIndex ){ return _ElementMCIndex( element , mcIndex ); }
+
+	template< unsigned int D >
+	bool Cube< D >::HasMCRoots( unsigned int mcIndex )
+	{
+		static const unsigned int Mask = (1<<(1<<D)) - 1;
+		return mcIndex!=0 && ( mcIndex & Mask )!=Mask;
+	}
+
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K!=0 , unsigned int >::type Cube< D >::_ElementMCIndex( Element< K > element , unsigned int mcIndex )
+	{
+		static const unsigned int Mask = ( 1<<( ElementNum< 0 >() / 2 ) ) - 1;
+		static const unsigned int Shift = ElementNum< 0 >() / 2 , _Shift = Cube< K >::template ElementNum< 0 >() / 2;
+		unsigned int mcIndex0 = mcIndex & Mask , mcIndex1 = ( mcIndex>>Shift ) & Mask;
+		Direction dir ; unsigned int coIndex;
+		element.factor( dir , coIndex );
+		if( dir==CROSS ) return Cube< D-1 >::template ElementMCIndex< K-1 >( coIndex , mcIndex0 ) | ( Cube< D-1 >::template ElementMCIndex< K-1 >( coIndex , mcIndex1 )<<_Shift );
+		else             return Cube< D-1 >::template ElementMCIndex< K   >( coIndex , dir==BACK ? mcIndex0 : mcIndex1 );
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K==0 , unsigned int >::type Cube< D >::_ElementMCIndex( Element< K > element , unsigned int mcIndex )
+	{
+		static const unsigned int Mask = ( 1<<( ElementNum< 0 >() / 2 ) ) - 1;
+		static const unsigned int Shift = ElementNum< 0 >() / 2 , _Shift = Cube< K >::template ElementNum< 0 >() / 2;
+		unsigned int mcIndex0 = mcIndex & Mask , mcIndex1 = ( mcIndex>>Shift ) & Mask;
+		Direction dir ; unsigned int coIndex;
+		element.factor( dir , coIndex );
+		return Cube< D-1 >::template ElementMCIndex< K >( typename Cube< D-1 >::template Element< K >( coIndex ) , dir==BACK ? mcIndex0 : mcIndex1 );
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D==K , unsigned int >::type Cube< D >::_ElementMCIndex( Element< K > element , unsigned int mcIndex ){ return mcIndex; }
+
+	template< unsigned int D > template< unsigned int K >
+	void Cube< D >::CellOffset( Element< K > e , IncidentCubeIndex< K > d , int x[D] ){ _CellOffset( e , d , x ); }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D==K >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d , int *x ){ for( int d=0 ; d<D ; d++ ) x[d] = 0; }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K!=0 >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d , int *x )
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if     ( eDir==CROSS ){ x[D-1] =  0                          ; Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K-1 >( eCoIndex ) , d                                                                 , x ); }
+		else if( eDir==BACK  ){ x[D-1] = -1 + ( dDir==BACK ? 0 : 1 ) ; Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) , x ); }
+		else if( eDir==FRONT ){ x[D-1] =  0 + ( dDir==BACK ? 0 : 1 ) ; Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) , x ); }
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K==0 >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d , int *x )
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if     ( eDir==BACK  ){ x[D-1] = -1 + ( dDir==BACK ? 0 : 1 ) ; Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) , x ); }
+		else if( eDir==FRONT ){ x[D-1] =  0 + ( dDir==BACK ? 0 : 1 ) ; Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) , x ); }
+	}
+
+	template< unsigned int D > template< unsigned int K >
+	unsigned int Cube< D >::CellOffset( Element< K > e , IncidentCubeIndex< K > d ){ return _CellOffset( e , d ); }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D==K && K==0 , unsigned int >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d ){ return 0; }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D==K && K!=0 , unsigned int >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d ){ return WindowIndex< IsotropicUIntPack< D , 3 > , IsotropicUIntPack< D , 1 > >::Index; }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K!=0 , unsigned int >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d )
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if     ( eDir==CROSS ){ return 1                          + Cube< D-1 >::template CellOffset( typename Cube< D-1 >::template Element< K-1 >( eCoIndex ) , d                                                                 ) * 3; }
+		else if( eDir==BACK  ){ return 0 + ( dDir==BACK ? 0 : 1 ) + Cube< D-1 >::template CellOffset( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ) * 3; }
+		else if( eDir==FRONT ){ return 1 + ( dDir==BACK ? 0 : 1 ) + Cube< D-1 >::template CellOffset( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ) * 3; }
+		return 0;
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+	typename std::enable_if< _D!=K && K==0 , unsigned int >::type Cube< D >::_CellOffset( Element< K > e , IncidentCubeIndex< K > d )
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if     ( eDir==BACK  ){ return 0 + ( dDir==BACK ? 0 : 1 ) + Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ) * 3; }
+		else if( eDir==FRONT ){ return 1 + ( dDir==BACK ? 0 : 1 ) + Cube< D-1 >::CellOffset( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ) * 3; }
+		return 0;
+	}
+
+
+	template< unsigned int D > template< unsigned int K >
+	typename Cube< D >::template Element< K > Cube< D >::IncidentElement( Element< K > e , IncidentCubeIndex< K > d ){ return _IncidentElement( e , d ); }
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D==K , typename Cube< D >::Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d ){ return e; }
+#else // !_MSC_VER
+	typename std::enable_if< _D==K , typename Cube< D >::template Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d ){ return e; }
+#endif // _MSC_VER
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K!=0 , typename Cube< D >::Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d )
+#else // !_MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K!=0 , typename Cube< D >::template Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d )
+#endif // _MSC_VER
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if     ( eDir==CROSS ) return Element< K >(           eDir   , Cube< D-1 >::template IncidentElement( typename Cube< D-1 >::template Element< K-1 >( eCoIndex ) , d                                                                 ).index );
+		else if( eDir==dDir  ) return Element< K >( Opposite( eDir ) , Cube< D-1 >::template IncidentElement( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ).index );
+		else                   return Element< K >(           eDir   , Cube< D-1 >::template IncidentElement( typename Cube< D-1 >::template Element< K   >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ).index );
+	}
+	template< unsigned int D > template< unsigned int K , unsigned int _D >
+#ifdef _MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K==0 , typename Cube< D >::Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d )
+#else // !_MSC_VER
+	typename std::enable_if< _D!=K && _D!=0 && K==0 , typename Cube< D >::template Element< K > >::type Cube< D >::_IncidentElement( Element< K > e , IncidentCubeIndex< K > d )
+#endif // _MSC_VER
+	{
+		Direction eDir , dDir ; unsigned int eCoIndex , dCoIndex;
+		e.factor( eDir , eCoIndex ) , d.factor( dDir , dCoIndex );
+		if( eDir==dDir ) return Element< K >( Opposite( eDir ) , Cube< D-1 >::template IncidentElement( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ).index );
+		else             return Element< K >(           eDir   , Cube< D-1 >::template IncidentElement( typename Cube< D-1 >::template Element< K >( eCoIndex ) , typename Cube< D-1 >::template IncidentCubeIndex< K >( dCoIndex ) ).index );
+	}
+
+
+	/////////////////////
+	// MarchingSquares //
+	/////////////////////
+	const int MarchingSquares::edges[][MAX_EDGES*2+1] =
+	{
+		// Positive to the right
+		// Positive in center
+		/////////////////////////////////// (0,0) (1,0) (0,1) (1,1)
+		{ -1 ,  -1 ,  -1 ,  -1 ,  -1 } , //   -     -     -     -  //
+		{  1 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     -     -  // (0,0) - (0,1) | (0,0) - (1,0)
+		{  0 ,   2 ,  -1 ,  -1 ,  -1 } , //   -     +     -     -  // (0,0) - (1,0) | (1,0) - (1,1)
+		{  1 ,   2 ,  -1 ,  -1 ,  -1 } , //   +     +     -     -  // (0,0) - (0,1) | (1,0) - (1,1)
+		{  3 ,   1 ,  -1 ,  -1 ,  -1 } , //   -     -     +     -  // (0,1) - (1,1) | (0,0) - (0,1)
+		{  3 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     +     -  // (0,1) - (1,1) | (0,0) - (1,0)
+		{  0 ,   1 ,   3 ,   2 ,  -1 } , //   -     +     +     -  // (0,0) - (1,0) | (0,0) - (0,1) & (0,1) - (1,1) | (1,0) - (1,1)
+		{  3 ,   2 ,  -1 ,  -1 ,  -1 } , //   +     +     +     -  // (0,1) - (1,1) | (1,0) - (1,1)
+		{  2 ,   3 ,  -1 ,  -1 ,  -1 } , //   -     -     -     +  // (1,0) - (1,1) | (0,1) - (1,1)
+		{  1 ,   3 ,   2 ,   0 ,  -1 } , //   +     -     -     +  // (0,0) - (0,1) | (0,1) - (1,1) & (1,0) - (1,1) | (0,0) - (1,0)
+		{  0 ,   3 ,  -1 ,  -1 ,  -1 } , //   -     +     -     +  // (0,0) - (1,0) | (0,1) - (1,1)
+		{  1 ,   3 ,  -1 ,  -1 ,  -1 } , //   +     +     -     +  // (0,0) - (0,1) | (0,1) - (1,1)
+		{  2 ,   1 ,  -1 ,  -1 ,  -1 } , //   -     -     +     +  // (1,0) - (1,1) | (0,0) - (0,1)
+		{  2 ,   0 ,  -1 ,  -1 ,  -1 } , //   +     -     +     +  // (1,0) - (1,1) | (0,0) - (1,0)
+		{  0 ,   1 ,  -1 ,  -1 ,  -1 } , //   -     +     +     +  // (0,0) - (1,0) | (0,0) - (0,1)
+		{ -1 ,  -1 ,  -1 ,  -1 ,  -1 } , //   +     +     +     +  //
+	};
+	inline int MarchingSquares::AddEdgeIndices( unsigned char mcIndex , int* isoIndices )
+	{
+		int nEdges = 0;
+		/* Square is entirely in/out of the surface */
+		if( mcIndex==0 || mcIndex==15 ) return 0;
+
+		/* Create the edges */
+		for( int i=0 ; edges[mcIndex][i]!=-1 ; i+=2 )
+		{
+			for( int j=0 ; j<2 ; j++ ) isoIndices[i+j] = edges[mcIndex][i+j];
+			nEdges++;
+		}
+		return nEdges;
+	}
+}
+
 #endif //MARCHING_CUBES_INCLUDED
diff --git a/Src/MemoryUsage.h b/Src/MemoryUsage.h
deleted file mode 100644
index c6997b7..0000000
--- a/Src/MemoryUsage.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-#ifndef MEMORY_USAGE_INCLUDED
-#define MEMORY_USAGE_INCLUDED
-
-#if defined( _WIN32 ) || defined( _WIN64 )
-
-#include <Windows.h>
-struct MemoryInfo
-{
-	static size_t Usage( void )
-	{
-		HANDLE h = GetCurrentProcess();
-		PROCESS_MEMORY_COUNTERS pmc;
-		return GetProcessMemoryInfo( h , &pmc , sizeof(pmc) ) ? pmc.WorkingSetSize : 0;
-	}
-};
-
-#else // !_WIN32 && !_WIN64
-
-#ifndef __APPLE__               // Linux variants
-
-#include <sys/time.h>
-#include <sys/resource.h>
-
-class MemoryInfo
-{
- public:
-  static size_t Usage(void)
-  {
-		FILE* f = fopen("/proc/self/stat","rb");
-		
-		int d;
-		long ld;
-		unsigned long lu;
-		unsigned long long llu;
-		char s[1024];
-		char c;
-		
-		int pid;
-		unsigned long vm;
-
-		int n = fscanf(f, "%d %s %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu"
-			,&pid ,s ,&c ,&d ,&d ,&d ,&d ,&d ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&ld ,&ld ,&ld ,&ld ,&d ,&ld ,&llu ,&vm ,&ld ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&lu ,&d ,&d ,&lu ,&lu );
-
-		fclose(f);
-/*
-pid %d 
-comm %s 
-state %c 
-ppid %d 
-pgrp %d 
-session %d 
-tty_nr %d
-tpgid %d 
-flags %lu 
-minflt %lu 
-cminflt %lu 
-majflt %lu 
-cmajflt %lu 
-utime %lu 
-stime %lu 
-cutime %ld 
-cstime %ld 
-priority %ld 
-nice %ld 
-0 %ld 
-itrealvalue %ld 
-starttime %lu 
-vsize %lu 
-rss %ld 
-rlim %lu 
-startcode %lu 
-endcode %lu 
-startstack %lu 
-kstkesp %lu 
-kstkeip %lu 
-signal %lu 
-blocked %lu 
-sigignore %lu 
-sigcatch %lu 
-wchan %lu 
-nswap %lu 
-cnswap %lu 
-exit_signal %d 
-processor %d 
-rt_priority %lu (since kernel 2.5.19) 
-policy %lu (since kernel 2.5.19) 
-*/
-		return vm;
-	}
-
-};
-#else // __APPLE__: has no "/proc" pseudo-file system
-
-// Thanks to David O'Gwynn for providing this fix.
-// This comes from a post by Michael Knight:
-//
-// http://miknight.blogspot.com/2005/11/resident-set-size-in-mac-os-x.html
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#include <mach/task.h>
-#include <mach/mach_init.h>
-
-void getres(task_t task, unsigned long *rss, unsigned long *vs)
-{
-    struct task_basic_info t_info;
-    mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
-
-    task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);
-    *rss = t_info.resident_size;
-    *vs = t_info.virtual_size;
-}
-
-class MemoryInfo
-{
- public:
-  static size_t Usage(void)
-  {
-    unsigned long rss, vs, psize;
-    task_t task = MACH_PORT_NULL;
-
-    if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS)
-        abort();
-    getres(task, &rss, &vs);
-    return rss;
-  }
-
-};
-
-#endif // !__APPLE__  
-
-#endif // _WIN32 || _WIN64
-
-#endif // MEMORY_USAGE_INCLUDE
diff --git a/Src/MultiGridOctreeData.Evaluation.inl b/Src/MultiGridOctreeData.Evaluation.inl
deleted file mode 100644
index c965eff..0000000
--- a/Src/MultiGridOctreeData.Evaluation.inl
+++ /dev/null
@@ -1,1151 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType>
-void Octree< Real >::_Evaluator< FEMDegree , BType >::set( LocalDepth depth )
-{
-	static const int  LeftPointSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	BSplineEvaluationData< FEMDegree , BType >::SetEvaluator( evaluator , depth );
-	if( depth>0 ) BSplineEvaluationData< FEMDegree , BType >::SetChildEvaluator( childEvaluator , depth-1 );
-	int center = ( 1<<depth )>>1;
-
-	// First set the stencils for the current depth
-	for( int x=-LeftPointSupportRadius ; x<=RightPointSupportRadius ; x++ ) for( int y=-LeftPointSupportRadius ; y<=RightPointSupportRadius ; y++ ) for( int z=-LeftPointSupportRadius ; z<=RightPointSupportRadius ; z++ )
-	{
-		int fIdx[] = { center+x , center+y , center+z };
-
-		// The cell stencil
-		{
-			double vv[3] , dv[3];
-			for( int dd=0 ; dd<DIMENSION ; dd++ )
-			{
-				vv[dd] = evaluator.centerValue( fIdx[dd] , center , false );
-				dv[dd] = evaluator.centerValue( fIdx[dd] , center , true  );
-			}
-			cellStencil( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-			dCellStencil( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-		}
-
-		//// The face stencil
-		for( int f=0 ; f<Cube::FACES ; f++ )
-		{
-			int dir , off;
-			Cube::FactorFaceIndex( f , dir , off );
-			double vv[3] , dv[3];
-			switch( dir )
-			{
-			case 0:
-				vv[0] = evaluator.cornerValue( fIdx[0] , center+off , false );
-				vv[1] = evaluator.centerValue( fIdx[1] , center     , false );
-				vv[2] = evaluator.centerValue( fIdx[2] , center     , false );
-				dv[0] = evaluator.cornerValue( fIdx[0] , center+off , true  );
-				dv[1] = evaluator.centerValue( fIdx[1] , center     , true  );
-				dv[2] = evaluator.centerValue( fIdx[2] , center     , true  );
-				break;
-			case 1:
-				vv[0] = evaluator.centerValue( fIdx[0] , center     , false );
-				vv[1] = evaluator.cornerValue( fIdx[1] , center+off , false );
-				vv[2] = evaluator.centerValue( fIdx[2] , center     , false );
-				dv[0] = evaluator.centerValue( fIdx[0] , center     , true  );
-				dv[1] = evaluator.cornerValue( fIdx[1] , center+off , true  );
-				dv[2] = evaluator.centerValue( fIdx[2] , center     , true  );
-				break;
-			case 2:
-				vv[0] = evaluator.centerValue( fIdx[0] , center     , false );
-				vv[1] = evaluator.centerValue( fIdx[1] , center     , false );
-				vv[2] = evaluator.cornerValue( fIdx[2] , center+off , false );
-				dv[0] = evaluator.centerValue( fIdx[0] , center     , true  );
-				dv[1] = evaluator.centerValue( fIdx[1] , center     , true  );
-				dv[2] = evaluator.cornerValue( fIdx[2] , center+off , true  );
-				break;
-			}
-			faceStencil[f]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-			dFaceStencil[f]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-		}
-
-		//// The edge stencil
-		for( int e=0 ; e<Cube::EDGES ; e++ )
-		{
-			int orientation , i1 , i2;
-			Cube::FactorEdgeIndex( e , orientation , i1 , i2 );
-			double vv[3] , dv[3];
-			switch( orientation )
-			{
-			case 0:
-				vv[0] = evaluator.centerValue( fIdx[0] , center    , false );
-				vv[1] = evaluator.cornerValue( fIdx[1] , center+i1 , false );
-				vv[2] = evaluator.cornerValue( fIdx[2] , center+i2 , false );
-				dv[0] = evaluator.centerValue( fIdx[0] , center    , true  );
-				dv[1] = evaluator.cornerValue( fIdx[1] , center+i1 , true  );
-				dv[2] = evaluator.cornerValue( fIdx[2] , center+i2 , true  );
-				break;
-			case 1:
-				vv[0] = evaluator.cornerValue( fIdx[0] , center+i1 , false );
-				vv[1] = evaluator.centerValue( fIdx[1] , center    , false );
-				vv[2] = evaluator.cornerValue( fIdx[2] , center+i2 , false );
-				dv[0] = evaluator.cornerValue( fIdx[0] , center+i1 , true  );
-				dv[1] = evaluator.centerValue( fIdx[1] , center    , true  );
-				dv[2] = evaluator.cornerValue( fIdx[2] , center+i2 , true  );
-				break;
-			case 2:
-				vv[0] = evaluator.cornerValue( fIdx[0] , center+i1 , false );
-				vv[1] = evaluator.cornerValue( fIdx[1] , center+i2 , false );
-				vv[2] = evaluator.centerValue( fIdx[2] , center    , false );
-				dv[0] = evaluator.cornerValue( fIdx[0] , center+i1 , true  );
-				dv[1] = evaluator.cornerValue( fIdx[1] , center+i2 , true  );
-				dv[2] = evaluator.centerValue( fIdx[2] , center    , true  );
-				break;
-			}
-			edgeStencil[e]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-			dEdgeStencil[e]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-		}
-
-		//// The corner stencil
-		for( int c=0 ; c<Cube::CORNERS ; c++ )
-		{
-			int cx , cy  ,cz;
-			Cube::FactorCornerIndex( c , cx , cy , cz );
-			double vv[3] , dv[3];
-			vv[0] = evaluator.cornerValue( fIdx[0] , center+cx , false );
-			vv[1] = evaluator.cornerValue( fIdx[1] , center+cy , false );
-			vv[2] = evaluator.cornerValue( fIdx[2] , center+cz , false );
-			dv[0] = evaluator.cornerValue( fIdx[0] , center+cx , true  );
-			dv[1] = evaluator.cornerValue( fIdx[1] , center+cy , true  );
-			dv[2] = evaluator.cornerValue( fIdx[2] , center+cz , true  );
-			cornerStencil[c]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-			dCornerStencil[c]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-		}
-	}
-
-	// Now set the stencils for the parents
-	for( int child=0 ; child<CHILDREN ; child++ )
-	{
-		int childX , childY , childZ;
-		Cube::FactorCornerIndex( child , childX , childY , childZ );
-		for( int x=-LeftPointSupportRadius ; x<=RightPointSupportRadius ; x++ ) for( int y=-LeftPointSupportRadius ; y<=RightPointSupportRadius ; y++ ) for( int z=-LeftPointSupportRadius ; z<=RightPointSupportRadius ; z++ )
-		{
-			int fIdx[] = { center/2+x , center/2+y , center/2+z };
-
-			//// The cell stencil
-			{
-				double vv[3] , dv[3];
-				vv[0] = childEvaluator.centerValue( fIdx[0] , center+childX , false );
-				vv[1] = childEvaluator.centerValue( fIdx[1] , center+childY , false );
-				vv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ , false );
-				dv[0] = childEvaluator.centerValue( fIdx[0] , center+childX , true  );
-				dv[1] = childEvaluator.centerValue( fIdx[1] , center+childY , true  );
-				dv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ , true  );
-				cellStencils[child]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-				dCellStencils[child]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-			}
-
-			//// The face stencil
-			for( int f=0 ; f<Cube::FACES ; f++ )
-			{
-				int dir , off;
-				Cube::FactorFaceIndex( f , dir , off );
-				double vv[3] , dv[3];
-				switch( dir )
-				{
-				case 0:
-					vv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+off , false );
-					vv[1] = childEvaluator.centerValue( fIdx[1] , center+childY     , false );
-					vv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ     , false );
-					dv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+off , true  );
-					dv[1] = childEvaluator.centerValue( fIdx[1] , center+childY     , true  );
-					dv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ     , true  );
-					break;
-				case 1:
-					vv[0] = childEvaluator.centerValue( fIdx[0] , center+childX     , false );
-					vv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+off , false );
-					vv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ     , false );
-					dv[0] = childEvaluator.centerValue( fIdx[0] , center+childX     , true  );
-					dv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+off , true  );
-					dv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ     , true  );
-					break;
-				case 2:
-					vv[0] = childEvaluator.centerValue( fIdx[0] , center+childX     , false );
-					vv[1] = childEvaluator.centerValue( fIdx[1] , center+childY     , false );
-					vv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+off , false );
-					dv[0] = childEvaluator.centerValue( fIdx[0] , center+childX     , true  );
-					dv[1] = childEvaluator.centerValue( fIdx[1] , center+childY     , true  );
-					dv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+off , true  );
-					break;
-				}
-				faceStencils[child][f]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-				dFaceStencils[child][f]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-			}
-
-			//// The edge stencil
-			for( int e=0 ; e<Cube::EDGES ; e++ )
-			{
-				int orientation , i1 , i2;
-				Cube::FactorEdgeIndex( e , orientation , i1 , i2 );
-				double vv[3] , dv[3];
-				switch( orientation )
-				{
-				case 0:
-					vv[0] = childEvaluator.centerValue( fIdx[0] , center+childX    , false );
-					vv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+i1 , false );
-					vv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+i2 , false );
-					dv[0] = childEvaluator.centerValue( fIdx[0] , center+childX    , true  );
-					dv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+i1 , true  );
-					dv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+i2 , true  );
-					break;
-				case 1:
-					vv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+i1 , false );
-					vv[1] = childEvaluator.centerValue( fIdx[1] , center+childY    , false );
-					vv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+i2 , false );
-					dv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+i1 , true  );
-					dv[1] = childEvaluator.centerValue( fIdx[1] , center+childY    , true  );
-					dv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+i2 , true  );
-					break;
-				case 2:
-					vv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+i1 , false );
-					vv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+i2 , false );
-					vv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ    , false );
-					dv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+i1 , true  );
-					dv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+i2 , true  );
-					dv[2] = childEvaluator.centerValue( fIdx[2] , center+childZ    , true  );
-					break;
-				}
-				edgeStencils[child][e]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-				dEdgeStencils[child][e]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-			}
-
-			//// The corner stencil
-			for( int c=0 ; c<Cube::CORNERS ; c++ )
-			{
-				int cx , cy  ,cz;
-				Cube::FactorCornerIndex( c , cx , cy , cz );
-				double vv[3] , dv[3];
-				vv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+cx , false );
-				vv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+cy , false );
-				vv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+cz , false );
-				dv[0] = childEvaluator.cornerValue( fIdx[0] , center+childX+cx , true  );
-				dv[1] = childEvaluator.cornerValue( fIdx[1] , center+childY+cy , true  );
-				dv[2] = childEvaluator.cornerValue( fIdx[2] , center+childZ+cz , true  );
-				cornerStencils[child][c]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = vv[0] * vv[1] * vv[2];
-				dCornerStencils[child][c]( x+LeftPointSupportRadius , y+LeftPointSupportRadius , z+LeftPointSupportRadius ) = Point3D< double >( dv[0] * vv[1] * vv[2] , vv[0] * dv[1] * vv[2] , vv[0] * vv[1] * dv[2] );
-			}
-		}
-	}
-	if( _bsData ) delete _bsData;
-	_bsData = new BSplineData< FEMDegree , BType >( depth );
-}
-template< class Real >
-template< class V , int FEMDegree , BoundaryType BType >
-V Octree< Real >::_getValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , Point3D< Real > p , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	if( IsActiveNode( node->children ) ) fprintf( stderr , "[WARNING] getValue assumes leaf node\n" );
-	V value(0);
-
-	while( GetGhostFlag( node ) )
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-
-			if( _isValidFEMNode( _n ) )
-			{
-				int _pIdx[3];
-				Point3D< Real > _s ; Real _w;
-				_startAndWidth( _n , _s , _w );
-				int _fIdx[3];
-				functionIndex< FEMDegree , BType >( _n , _fIdx );
-				for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-				value += 
-					solution[ _n->nodeData.nodeIndex ] *
-					(Real)
-					(
-						evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) *
-						evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) *
-						evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-						);
-			}
-		}
-		node = node->parent;
-	}
-
-	LocalDepth d = _localDepth( node );
-
-	for( int dd=0 ; dd<3 ; dd++ )
-		if     ( p[dd]==0 ) p[dd] = (Real)(0.+1e-6);
-		else if( p[dd]==1 ) p[dd] = (Real)(1.-1e-6);
-
-		{
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-				if( _isValidFEMNode( _n ) )
-				{
-					int _pIdx[3];
-					Point3D< Real > _s ; Real _w;
-					_startAndWidth( _n , _s , _w );
-					int _fIdx[3];
-					functionIndex< FEMDegree , BType >( _n , _fIdx );
-					for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-					value += 
-						solution[ _n->nodeData.nodeIndex ] *
-						(Real)
-						(
-							evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) *
-							evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) *
-							evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-							);
-				}
-			}
-			if( d>0 )
-			{
-				const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-				for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-				{
-					const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-					if( _isValidFEMNode( _n ) )
-					{
-						int _pIdx[3];
-						Point3D< Real > _s ; Real _w;
-						_startAndWidth( _n , _s , _w );
-						int _fIdx[3];
-						functionIndex< FEMDegree , BType >( _n , _fIdx );
-						for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-						value += 
-							coarseSolution[ _n->nodeData.nodeIndex ] *
-							(Real)
-							(
-								evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) *
-								evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) *
-								evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-								);
-					}
-				}
-			}
-		}
-		return value;
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-std::pair< Real , Point3D< Real > > Octree< Real >::_getValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , Point3D< Real > p , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	if( IsActiveNode( node->children ) ) fprintf( stderr , "[WARNING] _getValueAndGradient assumes leaf node\n" );
-	Real value(0);
-	Point3D< Real > gradient;
-
-	while( GetGhostFlag( node ) )
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-
-			if( _isValidFEMNode( _n ) )
-			{
-				int _pIdx[3];
-				Point3D< Real > _s; Real _w;
-				_startAndWidth( _n , _s , _w );
-				int _fIdx[3];
-				functionIndex< FEMDegree , BType >( _n , _fIdx );
-				for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-				value += 
-					solution[ _n->nodeData.nodeIndex ] *
-					(Real)
-					(
-						evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-					);
-				gradient += 
-					Point3D< Real >
-					(
-						evaluator._bsData->dBaseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-						evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->dBaseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-						evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->dBaseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-					) * solution[ _n->nodeData.nodeIndex ];
-			}
-		}
-		node = node->parent;
-	}
-
-
-	LocalDepth d = _localDepth( node );
-
-	for( int dd=0 ; dd<3 ; dd++ )
-		if     ( p[dd]==0 ) p[dd] = (Real)(0.+1e-6);
-		else if( p[dd]==1 ) p[dd] = (Real)(1.-1e-6);
-
-		{
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-
-				if( _isValidFEMNode( _n ) )
-				{
-					int _pIdx[3];
-					Point3D< Real > _s ; Real _w;
-					_startAndWidth( _n , _s , _w );
-					int _fIdx[3];
-					functionIndex< FEMDegree , BType >( _n , _fIdx );
-					for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-					value += 
-						solution[ _n->nodeData.nodeIndex ] *
-						(Real)
-						(
-							evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-						);
-					gradient += 
-						Point3D< Real >
-						(
-							evaluator._bsData->dBaseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-							evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->dBaseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-							evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->dBaseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-						) * solution[ _n->nodeData.nodeIndex ];
-				}
-			}
-			if( d>0 )
-			{
-				const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-				for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-				{
-					const TreeOctNode* _n = neighbors.neighbors[i][j][k];
-
-					if( _isValidFEMNode( _n ) )
-					{
-						int _pIdx[3];
-						Point3D< Real > _s ; Real _w;
-						_startAndWidth( _n , _s , _w );
-						int _fIdx[3];
-						functionIndex< FEMDegree , BType >( _n , _fIdx );
-						for( int dd=0 ; dd<3 ; dd++ ) _pIdx[dd] = std::max< int >( 0 , std::min< int >( SupportSize-1 , LeftSupportRadius + (int)floor( ( p[dd]-_s[dd] ) / _w ) ) );
-						value += 
-							coarseSolution[ _n->nodeData.nodeIndex ] *
-							(Real)
-							(
-								evaluator._bsData->baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-							);
-						gradient += 
-							Point3D< Real >
-							(
-								evaluator._bsData->dBaseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-								evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData->dBaseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData-> baseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] ) ,
-								evaluator._bsData-> baseBSplines[ _fIdx[0] ][ _pIdx[0] ]( p[0] ) * evaluator._bsData-> baseBSplines[ _fIdx[1] ][ _pIdx[1] ]( p[1] ) * evaluator._bsData->dBaseBSplines[ _fIdx[2] ][ _pIdx[2] ]( p[2] )
-							) * coarseSolution[ _n->nodeData.nodeIndex ];
-					}
-				}
-			}
-		}
-		return std::pair< Real , Point3D< Real > >( value , gradient );
-}
-template< class Real >
-template< class V , int FEMDegree , BoundaryType BType >
-V Octree< Real >::_getCenterValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineEvaluationData< FEMDegree , BType >::SupportSize;
-	static const int  LeftPointSupportRadius =   BSplineEvaluationData< FEMDegree , BType >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineEvaluationData< FEMDegree , BType >::SupportStart;
-
-	if( IsActiveNode( node->children ) ) fprintf( stderr , "[WARNING] getCenterValue assumes leaf node\n" );
-	V value(0);
-	LocalDepth d = _localDepth( node );
-
-	if( isInterior )
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* n = neighbors.neighbors[i][j][k];
-			if( IsActiveNode( n ) ) value += solution[ n->nodeData.nodeIndex ] * Real( evaluator.cellStencil( i , j , k ) );
-		}
-		if( d>0 )
-		{
-			int _corner = int( node - node->parent->children );
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* n = neighbors.neighbors[i][j][k];
-				if( IsActiveNode( n ) ) value += coarseSolution[n->nodeData.nodeIndex] * Real( evaluator.cellStencils[_corner]( i , j , k ) );
-			}
-		}
-	}
-	else
-	{
-		LocalOffset cIdx;
-		_localDepthAndOffset( node , d , cIdx );
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* n = neighbors.neighbors[i][j][k];
-
-			if( _isValidFEMNode( n ) )
-			{
-				LocalDepth _d ; LocalOffset fIdx;
-				_localDepthAndOffset( n , _d , fIdx );
-				value +=
-					solution[ n->nodeData.nodeIndex ] *
-					Real(
-						evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false ) *
-						evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false ) *
-						evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false )
-					);
-			}
-		}
-		if( d>0 )
-		{
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* n = neighbors.neighbors[i][j][k];
-				if( _isValidFEMNode( n ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( n , _d , fIdx );
-					value +=
-						coarseSolution[ n->nodeData.nodeIndex ] *
-						Real(
-							evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false ) *
-							evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false ) *
-							evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false )
-						);
-				}
-			}
-		}
-	}
-	return value;
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-std::pair< Real , Point3D< Real > > Octree< Real >::_getCenterValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineEvaluationData< FEMDegree , BType >::SupportSize;
-	static const int  LeftPointSupportRadius =   BSplineEvaluationData< FEMDegree , BType >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineEvaluationData< FEMDegree , BType >::SupportStart;
-
-	if( IsActiveNode( node->children ) ) fprintf( stderr , "[WARNING] getCenterValueAndGradient assumes leaf node\n" );
-	Real value(0);
-	Point3D< Real > gradient;
-	LocalDepth d = _localDepth( node );
-
-	if( isInterior )
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* n = neighbors.neighbors[i][j][k];
-			if( IsActiveNode( n ) )
-			{
-				value    +=          Real  ( evaluator. cellStencil( i , j , k ) ) * solution[ n->nodeData.nodeIndex ];
-				gradient += Point3D< Real >( evaluator.dCellStencil( i , j , k ) ) * solution[ n->nodeData.nodeIndex ];
-			}
-		}
-		if( d>0 )
-		{
-			int _corner = int( node - node->parent->children );
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* n = neighbors.neighbors[i][j][k];
-				if( IsActiveNode( n ) )
-				{
-					value    +=          Real  ( evaluator. cellStencils[_corner]( i , j , k ) ) * coarseSolution[n->nodeData.nodeIndex];
-					gradient += Point3D< Real >( evaluator.dCellStencils[_corner]( i , j , k ) ) * coarseSolution[n->nodeData.nodeIndex];
-				}
-			}
-		}
-	}
-	else
-	{
-		LocalOffset cIdx;
-		_localDepthAndOffset( node , d , cIdx );
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-
-		for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			const TreeOctNode* n = neighbors.neighbors[i][j][k];
-
-			if( _isValidFEMNode( n ) )
-			{
-				LocalDepth _d ; LocalOffset fIdx;
-				_localDepthAndOffset( n , _d , fIdx );
-				value +=
-					Real
-					(
-						evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false )
-					) * solution[ n->nodeData.nodeIndex ];
-				gradient += 
-					Point3D< Real >
-					(
-						evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , true  ) * evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false ) ,
-						evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , true  ) * evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false ) ,
-						evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , true  )
-						) * solution[ n->nodeData.nodeIndex ];
-			}
-		}
-		if( d>0 )
-		{
-			const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-			for( int i=0 ; i<SupportSize ; i++ ) for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-			{
-				const TreeOctNode* n = neighbors.neighbors[i][j][k];
-				if( _isValidFEMNode( n ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( n , _d , fIdx );
-					value +=
-						Real
-						(
-							evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false )
-						) * coarseSolution[ n->nodeData.nodeIndex ];
-					gradient +=
-						Point3D< Real >
-						(
-							evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , true  ) * evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false ) ,
-							evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , true  ) * evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false ) ,
-							evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false ) * evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false ) * evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , true  )
-						) * coarseSolution[ n->nodeData.nodeIndex ];
-				}
-			}
-		}
-	}
-	return std::pair< Real , Point3D< Real > >( value , gradient );
-}
-template< class Real >
-template< class V , int FEMDegree , BoundaryType BType >
-V Octree< Real >::_getEdgeValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int edge , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineEvaluationData< FEMDegree , BType >::SupportSize;
-	static const int  LeftPointSupportRadius =  BSplineEvaluationData< FEMDegree , BType >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineEvaluationData< FEMDegree , BType >::SupportStart;
-	V value(0);
-	LocalDepth d ; LocalOffset cIdx;
-	_localDepthAndOffset( node , d , cIdx );
-	int startX = 0 , endX = SupportSize , startY = 0 , endY = SupportSize , startZ = 0 , endZ = SupportSize;
-	int orientation , i1 , i2;
-	Cube::FactorEdgeIndex( edge , orientation , i1 , i2 );
-	switch( orientation )
-	{
-	case 0:
-		cIdx[1] += i1 , cIdx[2] += i2;
-		if( i1 ) startY++ ; else endY--;
-		if( i2 ) startZ++ ; else endZ--;
-		break;
-	case 1:
-		cIdx[0] += i1 , cIdx[2] += i2;
-		if( i1 ) startX++ ; else endX--;
-		if( i2 ) startZ++ ; else endZ--;
-		break;
-	case 2:
-		cIdx[0] += i1 , cIdx[1] += i2;
-		if( i1 ) startX++ ; else endX--;
-		if( i2 ) startY++ ; else endY--;
-		break;
-	}
-
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , d );
-		for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-		{
-			const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-			if( _isValidFEMNode( _node ) )
-			{
-				if( isInterior ) value += solution[ _node->nodeData.nodeIndex ] * evaluator.edgeStencil[edge]( x , y , z );
-				else
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					switch( orientation )
-					{
-					case 0:
-						value +=
-							solution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					case 1:
-						value +=
-							solution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					case 2:
-						value +=
-							solution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					}
-				}
-			}
-		}
-	}
-	if( d>0 )
-	{
-		int _corner = int( node - node->parent->children );
-		int _cx , _cy , _cz;
-		Cube::FactorCornerIndex( _corner , _cx , _cy , _cz );
-		// If the corner/child indices don't match, then the sample position is in the interior of the
-		// coarser cell and so the full support resolution should be used.
-		switch( orientation )
-		{
-		case 0:
-			if( _cy!=i1 ) startY = 0 , endY = SupportSize;
-			if( _cz!=i2 ) startZ = 0 , endZ = SupportSize;
-			break;
-		case 1:
-			if( _cx!=i1 ) startX = 0 , endX = SupportSize;
-			if( _cz!=i2 ) startZ = 0 , endZ = SupportSize;
-			break;
-		case 2:
-			if( _cx!=i1 ) startX = 0 , endX = SupportSize;
-			if( _cy!=i2 ) startY = 0 , endY = SupportSize;
-			break;
-		}
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-		for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-		{
-			const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-			if( _isValidFEMNode( _node ) )
-			{
-				if( isInterior ) value += coarseSolution[ _node->nodeData.nodeIndex ] * evaluator.edgeStencils[_corner][edge]( x , y , z );
-				else
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					switch( orientation )
-					{
-					case 0:
-						value +=
-							coarseSolution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					case 1:
-						value +=
-							coarseSolution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					case 2:
-						value +=
-							coarseSolution[ _node->nodeData.nodeIndex ] *
-							Real(
-								evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-								evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-								evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false )
-							);
-						break;
-					}
-				}
-			}
-		}
-	}
-	return Real( value );
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-std::pair< Real , Point3D< Real > > Octree< Real >::_getEdgeValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int edge , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineEvaluationData< FEMDegree , BType >::SupportSize;
-	static const int  LeftPointSupportRadius =  BSplineEvaluationData< FEMDegree , BType >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineEvaluationData< FEMDegree , BType >::SupportStart;
-	double value = 0;
-	Point3D< double > gradient;
-	LocalDepth d ; LocalOffset cIdx;
-	_localDepthAndOffset( node , d , cIdx );
-
-	int startX = 0 , endX = SupportSize , startY = 0 , endY = SupportSize , startZ = 0 , endZ = SupportSize;
-	int orientation , i1 , i2;
-	Cube::FactorEdgeIndex( edge , orientation , i1 , i2 );
-	switch( orientation )
-	{
-	case 0:
-		cIdx[1] += i1 , cIdx[2] += i2;
-		if( i1 ) startY++ ; else endY--;
-		if( i2 ) startZ++ ; else endZ--;
-		break;
-	case 1:
-		cIdx[0] += i1 , cIdx[2] += i2;
-		if( i1 ) startX++ ; else endX--;
-		if( i2 ) startZ++ ; else endZ--;
-		break;
-	case 2:
-		cIdx[0] += i1 , cIdx[1] += i2;
-		if( i1 ) startX++ ; else endX--;
-		if( i2 ) startY++ ; else endY--;
-		break;
-	}
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-		for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-		{
-			const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-			if( _isValidFEMNode( _node ) )
-			{
-				if( isInterior )
-				{
-					value    += evaluator. edgeStencil[edge]( x , y , z ) * solution[ _node->nodeData.nodeIndex ];
-					gradient += evaluator.dEdgeStencil[edge]( x , y , z ) * solution[ _node->nodeData.nodeIndex ];
-				}
-				else
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-
-					double vv[3] , dv[3];
-					switch( orientation )
-					{
-					case 0:
-						vv[0] = evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.evaluator.centerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					case 1:
-						vv[0] = evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.evaluator.centerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					case 2:
-						vv[0] = evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.evaluator.centerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					}
-					value += solution[ _node->nodeData.nodeIndex ] * vv[0] * vv[1] * vv[2];
-					gradient += Point3D< double >( dv[0]*vv[1]*vv[2] , vv[0]*dv[1]*vv[2] , vv[0]*vv[1]*dv[2] ) * solution[ _node->nodeData.nodeIndex ];
-				}
-			}
-		}
-	}
-	if( d>0 )
-	{
-		int _corner = int( node - node->parent->children );
-		int _cx , _cy , _cz;
-		Cube::FactorCornerIndex( _corner , _cx , _cy , _cz );
-		// If the corner/child indices don't match, then the sample position is in the interior of the
-		// coarser cell and so the full support resolution should be used.
-		switch( orientation )
-		{
-		case 0:
-			if( _cy!=i1 ) startY = 0 , endY = SupportSize;
-			if( _cz!=i2 ) startZ = 0 , endZ = SupportSize;
-			break;
-		case 1:
-			if( _cx!=i1 ) startX = 0 , endX = SupportSize;
-			if( _cz!=i2 ) startZ = 0 , endZ = SupportSize;
-			break;
-		case 2:
-			if( _cx!=i1 ) startX = 0 , endX = SupportSize;
-			if( _cy!=i2 ) startY = 0 , endY = SupportSize;
-			break;
-		}
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-		for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-		{
-			const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-			if( _isValidFEMNode( _node ) )
-			{
-				if( isInterior )
-				{
-					value    += evaluator. edgeStencils[_corner][edge]( x , y , z ) * coarseSolution[ _node->nodeData.nodeIndex ];
-					gradient += evaluator.dEdgeStencils[_corner][edge]( x , y , z ) * coarseSolution[ _node->nodeData.nodeIndex ];
-				}
-				else
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					double vv[3] , dv[3];
-					switch( orientation )
-					{
-					case 0:
-						vv[0] = evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.childEvaluator.centerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					case 1:
-						vv[0] = evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.childEvaluator.centerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					case 2:
-						vv[0] = evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false );
-						vv[1] = evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false );
-						vv[2] = evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , false );
-						dv[0] = evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , true  );
-						dv[1] = evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , true  );
-						dv[2] = evaluator.childEvaluator.centerValue( fIdx[2] , cIdx[2] , true  );
-						break;
-					}
-					value += coarseSolution[ _node->nodeData.nodeIndex ] * vv[0] * vv[1] * vv[2];
-					gradient += Point3D< double >( dv[0]*vv[1]*vv[2] , vv[0]*dv[1]*vv[2] , vv[0]*vv[1]*dv[2] ) * coarseSolution[ _node->nodeData.nodeIndex ];
-				}
-			}
-		}
-	}
-	return std::pair< Real , Point3D< Real > >( Real( value ) , Point3D< Real >( gradient ) );
-}
-
-template< class Real >
-template< class V , int FEMDegree , BoundaryType BType >
-V Octree< Real >::_getCornerValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int corner , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	V value(0);
-	LocalDepth d ; LocalOffset cIdx;
-	_localDepthAndOffset( node , d , cIdx );
-
-	int cx , cy , cz;
-	int startX = 0 , endX = SupportSize , startY = 0 , endY = SupportSize , startZ = 0 , endZ = SupportSize;
-	Cube::FactorCornerIndex( corner , cx , cy , cz );
-	cIdx[0] += cx , cIdx[1] += cy , cIdx[2] += cz;
-	{
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-		if( cx==0 ) endX--;
-		else      startX++;
-		if( cy==0 ) endY--;
-		else      startY++;
-		if( cz==0 ) endZ--;
-		else      startZ++;
-		if( isInterior )
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node=neighbors.neighbors[x][y][z];
-				if( IsActiveNode( _node ) ) value += solution[ _node->nodeData.nodeIndex ] * Real( evaluator.cornerStencil[corner]( x , y , z ) );
-			}
-		else
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-				if( _isValidFEMNode( _node ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					value +=
-						solution[ _node->nodeData.nodeIndex ] *
-						Real(
-							evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-							evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-							evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-						);
-				}
-			}
-	}
-	if( d>0 )
-	{
-		int _corner = int( node - node->parent->children );
-		int _cx , _cy , _cz;
-		Cube::FactorCornerIndex( _corner , _cx , _cy , _cz );
-		// If the corner/child indices don't match, then the sample position is in the interior of the
-		// coarser cell and so the full support resolution should be used.
-		if( cx!=_cx ) startX = 0 , endX = SupportSize;
-		if( cy!=_cy ) startY = 0 , endY = SupportSize;
-		if( cz!=_cz ) startZ = 0 , endZ = SupportSize;
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-		if( isInterior )
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node=neighbors.neighbors[x][y][z];
-				if( IsActiveNode( _node ) ) value += coarseSolution[ _node->nodeData.nodeIndex ] * Real( evaluator.cornerStencils[_corner][corner]( x , y , z ) );
-			}
-		else
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-				if( _isValidFEMNode( _node ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					value +=
-						coarseSolution[ _node->nodeData.nodeIndex ] *
-						Real(
-							evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false ) *
-							evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false ) *
-							evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false )
-						);
-				}
-			}
-	}
-	return Real( value );
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-std::pair< Real , Point3D< Real > > Octree< Real >::_getCornerValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int corner , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	double value = 0;
-	Point3D< double > gradient;
-	LocalDepth d ; LocalOffset cIdx;
-	_localDepthAndOffset( node , d , cIdx );
-
-	int cx , cy , cz;
-	int startX = 0 , endX = SupportSize , startY = 0 , endY = SupportSize , startZ = 0 , endZ = SupportSize;
-	Cube::FactorCornerIndex( corner , cx , cy , cz );
-	cIdx[0] += cx , cIdx[1] += cy , cIdx[2] += cz;
-	{
-		if( cx==0 ) endX--;
-		else      startX++;
-		if( cy==0 ) endY--;
-		else      startY++;
-		if( cz==0 ) endZ--;
-		else      startZ++;
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node );
-		if( isInterior )
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node=neighbors.neighbors[x][y][z];
-				if( IsActiveNode( _node ) ) value += solution[ _node->nodeData.nodeIndex ] * evaluator.cornerStencil[corner]( x , y , z ) , gradient += evaluator.dCornerStencil[corner]( x , y , z ) * solution[ _node->nodeData.nodeIndex ];
-			}
-		else
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-				if( _isValidFEMNode( _node ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					double v [] = { evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , false ) , evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , false ) , evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , false ) };
-					double dv[] = { evaluator.evaluator.cornerValue( fIdx[0] , cIdx[0] , true  ) , evaluator.evaluator.cornerValue( fIdx[1] , cIdx[1] , true  ) , evaluator.evaluator.cornerValue( fIdx[2] , cIdx[2] , true  ) };
-					value += solution[ _node->nodeData.nodeIndex ] * v[0] * v[1] * v[2];
-					gradient += Point3D< double >( dv[0]*v[1]*v[2] , v[0]*dv[1]*v[2] , v[0]*v[1]*dv[2] ) * solution[ _node->nodeData.nodeIndex ];
-				}
-			}
-	}
-	if( d>0 )
-	{
-		int _corner = int( node - node->parent->children );
-		int _cx , _cy , _cz;
-		Cube::FactorCornerIndex( _corner , _cx , _cy , _cz );
-		if( cx!=_cx ) startX = 0 , endX = SupportSize;
-		if( cy!=_cy ) startY = 0 , endY = SupportSize;
-		if( cz!=_cz ) startZ = 0 , endZ = SupportSize;
-		const typename TreeOctNode::ConstNeighbors< SupportSize >& neighbors = _neighbors< LeftPointSupportRadius , RightPointSupportRadius >( neighborKey , node->parent );
-		if( isInterior )
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node=neighbors.neighbors[x][y][z];
-				if( IsActiveNode( _node ) ) value += coarseSolution[ _node->nodeData.nodeIndex ] * evaluator.cornerStencils[_corner][corner]( x , y , z ) , gradient += evaluator.dCornerStencils[_corner][corner]( x , y , z ) * coarseSolution[ _node->nodeData.nodeIndex ];
-			}
-		else
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-				if( _isValidFEMNode( _node ) )
-				{
-					LocalDepth _d ; LocalOffset fIdx;
-					_localDepthAndOffset( _node , _d , fIdx );
-					double v [] = { evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , false ) , evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , false ) , evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , false ) };
-					double dv[] = { evaluator.childEvaluator.cornerValue( fIdx[0] , cIdx[0] , true  ) , evaluator.childEvaluator.cornerValue( fIdx[1] , cIdx[1] , true  ) , evaluator.childEvaluator.cornerValue( fIdx[2] , cIdx[2] , true  ) };
-					value += coarseSolution[ _node->nodeData.nodeIndex ] * v[0] * v[1] * v[2];
-					gradient += Point3D< double >( dv[0]*v[1]*v[2] , v[0]*dv[1]*v[2] , v[0]*v[1]*dv[2] ) * coarseSolution[ _node->nodeData.nodeIndex ];
-				}
-			}
-	}
-	return std::pair< Real , Point3D< Real > >( Real( value ) , Point3D< Real >( gradient ) );
-}
-template< class Real >
-template< int Degree , BoundaryType BType >
-Octree< Real >::MultiThreadedEvaluator< Degree , BType >::MultiThreadedEvaluator( const Octree< Real >* tree , const DenseNodeData< Real , Degree >& coefficients , int threads ) : _coefficients( coefficients ) , _tree( tree )
-{
-	_threads = std::max< int >( 1 , threads );
-	_neighborKeys.resize( _threads );
-	_coarseCoefficients = _tree->template coarseCoefficients< Real , Degree , BType >( _coefficients );
-	_evaluator.set( _tree->_maxDepth );
-	for( int t=0 ; t<_threads ; t++ ) _neighborKeys[t].set( tree->_localToGlobal( _tree->_maxDepth ) );
-}
-template< class Real >
-template< int Degree , BoundaryType BType >
-Real Octree< Real >::MultiThreadedEvaluator< Degree , BType >::value( Point3D< Real > p , int thread , const TreeOctNode* node )
-{
-	if( !node ) node = _tree->leaf( p );
-	ConstPointSupportKey< Degree >& nKey = _neighborKeys[thread];
-	nKey.getNeighbors( node );
-	return _tree->template _getValue< Real , Degree >( nKey , node , p , _coefficients , _coarseCoefficients , _evaluator );
-}
-template< class Real >
-template< int Degree , BoundaryType BType >
-std::pair< Real , Point3D< Real > > Octree< Real >::MultiThreadedEvaluator< Degree , BType >::valueAndGradient( Point3D< Real > p , int thread , const TreeOctNode* node )
-{
-	if( !node ) node = _tree->leaf( p );
-	ConstPointSupportKey< Degree >& nKey = _neighborKeys[thread];
-	nKey.getNeighbors( node );
-	return _tree->template _getValueAndGradient< Degree >( nKey , node , p , _coefficients , _coarseCoefficients , _evaluator );
-}
diff --git a/Src/MultiGridOctreeData.IsoSurface.inl b/Src/MultiGridOctreeData.IsoSurface.inl
deleted file mode 100644
index 2be26dc..0000000
--- a/Src/MultiGridOctreeData.IsoSurface.inl
+++ /dev/null
@@ -1,1106 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-#include "Octree.h"
-#include "MyTime.h"
-#include "MemoryUsage.h"
-#include "MAT.h"
-
-template< class Real >
-template< class Vertex >
-Octree< Real >::_SliceValues< Vertex >::_SliceValues( void )
-{
-	_oldCCount = _oldECount = _oldFCount = _oldNCount = 0;
-	cornerValues = NullPointer( Real ) ; cornerGradients = NullPointer( Point3D< Real > ) ; cornerSet = NullPointer( char );
-	edgeKeys = NullPointer( long long ) ; edgeSet = NullPointer( char );
-	faceEdges = NullPointer( _FaceEdges ) ; faceSet = NullPointer( char );
-	mcIndices = NullPointer( char );
-}
-template< class Real >
-template< class Vertex >
-Octree< Real >::_SliceValues< Vertex >::~_SliceValues( void )
-{
-	_oldCCount = _oldECount = _oldFCount = _oldNCount = 0;
-	FreePointer( cornerValues ) ; FreePointer( cornerGradients ) ; FreePointer( cornerSet );
-	FreePointer( edgeKeys ) ; FreePointer( edgeSet );
-	FreePointer( faceEdges ) ; FreePointer( faceSet );
-	FreePointer( mcIndices );
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_SliceValues< Vertex >::reset( bool nonLinearFit )
-{
-	faceEdgeMap.clear() , edgeVertexMap.clear() , vertexPairMap.clear();
-
-	if( _oldNCount<sliceData.nodeCount )
-	{
-		_oldNCount = sliceData.nodeCount;
-		FreePointer( mcIndices );
-		if( sliceData.nodeCount>0 ) mcIndices = AllocPointer< char >( _oldNCount );
-	}
-	if( _oldCCount<sliceData.cCount )
-	{
-		_oldCCount = sliceData.cCount;
-		FreePointer( cornerValues ) ; FreePointer( cornerGradients ) ; FreePointer( cornerSet );
-		if( sliceData.cCount>0 )
-		{
-			cornerValues = AllocPointer< Real >( _oldCCount );
-			if( nonLinearFit ) cornerGradients = AllocPointer< Point3D< Real > >( _oldCCount );
-			cornerSet = AllocPointer< char >( _oldCCount );
-		}
-	}
-	if( _oldECount<sliceData.eCount )
-	{
-		_oldECount = sliceData.eCount;
-		FreePointer( edgeKeys ) ; FreePointer( edgeSet );
-		edgeKeys = AllocPointer< long long >( _oldECount );
-		edgeSet = AllocPointer< char >( _oldECount );
-	}
-	if( _oldFCount<sliceData.fCount )
-	{
-		_oldFCount = sliceData.fCount;
-		FreePointer( faceEdges ) ; FreePointer( faceSet );
-		faceEdges = AllocPointer< _FaceEdges >( _oldFCount );
-		faceSet = AllocPointer< char >( _oldFCount );
-	}
-	
-	if( sliceData.cCount>0 ) memset( cornerSet , 0 , sizeof( char ) * sliceData.cCount );
-	if( sliceData.eCount>0 ) memset(   edgeSet , 0 , sizeof( char ) * sliceData.eCount );
-	if( sliceData.fCount>0 ) memset(   faceSet , 0 , sizeof( char ) * sliceData.fCount );
-}
-template< class Real >
-template< class Vertex >
-Octree< Real >::_XSliceValues< Vertex >::_XSliceValues( void )
-{
-	_oldECount = _oldFCount = 0;
-	edgeKeys = NullPointer( long long ) ; edgeSet = NullPointer( char );
-	faceEdges = NullPointer( _FaceEdges ) ; faceSet = NullPointer( char );
-}
-template< class Real >
-template< class Vertex >
-Octree< Real >::_XSliceValues< Vertex >::~_XSliceValues( void )
-{
-	_oldECount = _oldFCount = 0;
-	FreePointer( edgeKeys ) ; FreePointer( edgeSet );
-	FreePointer( faceEdges ) ; FreePointer( faceSet );
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_XSliceValues< Vertex >::reset( void )
-{
-	faceEdgeMap.clear() , edgeVertexMap.clear() , vertexPairMap.clear();
-
-	if( _oldECount<xSliceData.eCount )
-	{
-		_oldECount = xSliceData.eCount;
-		FreePointer( edgeKeys ) ; FreePointer( edgeSet );
-		edgeKeys = AllocPointer< long long >( _oldECount );
-		edgeSet = AllocPointer< char >( _oldECount );
-	}
-	if( _oldFCount<xSliceData.fCount )
-	{
-		_oldFCount = xSliceData.fCount;
-		FreePointer( faceEdges ) ; FreePointer( faceSet );
-		faceEdges = AllocPointer< _FaceEdges >( _oldFCount );
-		faceSet = AllocPointer< char >( _oldFCount );
-	}
-	if( xSliceData.eCount>0 ) memset( edgeSet , 0 , sizeof( char ) * xSliceData.eCount );
-	if( xSliceData.fCount>0 ) memset( faceSet , 0 , sizeof( char ) * xSliceData.fCount );
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , int WeightDegree , int ColorDegree , class Vertex >
-void Octree< Real >::getMCIsoSurface( const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , const DenseNodeData< Real , FEMDegree >& solution , Real isoValue , CoredMeshData< Vertex >& mesh , bool nonLinearFit , bool addBarycenter , bool polygonMesh )
-{
-	if( FEMDegree==1 && nonLinearFit ) fprintf( stderr , "[WARNING] First order B-Splines do not support non-linear interpolation\n" ) , nonLinearFit = false;
-
-	BSplineData< ColorDegree , BOUNDARY_NEUMANN >* colorBSData = NULL;
-	if( colorData ) colorBSData = new BSplineData< ColorDegree , BOUNDARY_NEUMANN >( _maxDepth );
-	DenseNodeData< Real , FEMDegree > coarseSolution( _sNodesEnd(_maxDepth-1) );
-	memset( &coarseSolution[0] , 0 , sizeof(Real)*_sNodesEnd( _maxDepth-1) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ ) coarseSolution[i] = solution[i];
-	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample< Real , FEMDegree , BType >( d , coarseSolution );
-	memoryUsage();
-
-	std::vector< _Evaluator< FEMDegree , BType > > evaluators( _maxDepth+1 );
-	for( LocalDepth d=0 ; d<=_maxDepth ; d++ ) evaluators[d].set( d );
-
-	int vertexOffset = 0;
-
-	std::vector< _SlabValues< Vertex > > slabValues( _maxDepth+1 );
-
-	// Initialize the back slice
-	for( LocalDepth d=_maxDepth ; d>=0 ; d-- )
-	{
-		_sNodes.setSliceTableData ( slabValues[d]. sliceValues(0). sliceData , _localToGlobal( d ) , 0 + _localInset( d ) , threads );
-		_sNodes.setSliceTableData ( slabValues[d]. sliceValues(1). sliceData , _localToGlobal( d ) , 1 + _localInset( d ) , threads );
-		_sNodes.setXSliceTableData( slabValues[d].xSliceValues(0).xSliceData , _localToGlobal( d ) , 0 + _localInset( d ) , threads );
-		slabValues[d].sliceValues (0).reset( nonLinearFit );
-		slabValues[d].sliceValues (1).reset( nonLinearFit );
-		slabValues[d].xSliceValues(0).reset( );
-	}
-	for( LocalDepth d=_maxDepth ; d>=0 ; d-- )
-	{
-		// Copy edges from finer
-		if( d<_maxDepth ) _copyFinerSliceIsoEdgeKeys( d , 0 , slabValues , threads );
-		_setSliceIsoCorners( solution , coarseSolution , isoValue , d , 0 , slabValues , evaluators[d] , threads );
-		_setSliceIsoVertices< WeightDegree , ColorDegree >( colorBSData , densityWeights , colorData , isoValue , d , 0 , vertexOffset , mesh , slabValues , threads );
-		_setSliceIsoEdges( d , 0 , slabValues , threads );
-	}
-
-	// Iterate over the slices at the finest level
-	for( int slice=0 ; slice<( 1<<_maxDepth ) ; slice++ )
-	{
-		// Process at all depths that contain this slice
-		LocalDepth d ; int o;
-		for( d=_maxDepth , o=slice+1 ; d>=0 ; d-- , o>>=1 )
-		{
-			// Copy edges from finer (required to ensure we correctly track edge cancellations)
-			if( d<_maxDepth )
-			{
-				_copyFinerSliceIsoEdgeKeys( d , o , slabValues , threads );
-				_copyFinerXSliceIsoEdgeKeys( d , o-1 , slabValues , threads );
-			}
-
-			// Set the slice values/vertices
-			_setSliceIsoCorners( solution , coarseSolution , isoValue , d , o , slabValues , evaluators[d] , threads );
-			_setSliceIsoVertices< WeightDegree , ColorDegree >( colorBSData , densityWeights , colorData , isoValue , d , o , vertexOffset , mesh , slabValues , threads );
-			_setSliceIsoEdges( d , o , slabValues , threads );
-
-			// Set the cross-slice edges
-			_setXSliceIsoVertices< WeightDegree , ColorDegree >( colorBSData , densityWeights , colorData , isoValue , d , o-1 , vertexOffset , mesh , slabValues , threads );
-			_setXSliceIsoEdges( d , o-1 , slabValues , threads );
-
-			// Add the triangles
-			_setIsoSurface( d , o-1 , slabValues[d].sliceValues(o-1) , slabValues[d].sliceValues(o) , slabValues[d].xSliceValues(o-1) , mesh , polygonMesh , addBarycenter , vertexOffset , threads );
-
-			if( o&1 ) break;
-		}
-
-		for( d=_maxDepth , o=slice+1 ; d>=0 ; d-- , o>>=1 )
-		{
-			// Initialize for the next pass
-			if( o<(1<<(d+1)) )
-			{
-				_sNodes.setSliceTableData( slabValues[d].sliceValues(o+1).sliceData , _localToGlobal( d ) , o+1 + _localInset( d ) , threads );
-				_sNodes.setXSliceTableData( slabValues[d].xSliceValues(o).xSliceData , _localToGlobal( d ) , o + _localInset( d ) , threads );
-				slabValues[d].sliceValues(o+1).reset( nonLinearFit );
-				slabValues[d].xSliceValues(o).reset();
-			}
-			if( o&1 ) break;
-		}
-	}
-	memoryUsage();
-	if( colorBSData ) delete colorBSData;
-}
-
-
-template< class Real >
-template< class Vertex , int FEMDegree , BoundaryType BType >
-void Octree< Real >::_setSliceIsoCorners( const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , Real isoValue , LocalDepth depth , int slice , std::vector< _SlabValues< Vertex > >& slabValues , const _Evaluator< FEMDegree , BType >& evaluator , int threads )
-{
-	if( slice>0          ) _setSliceIsoCorners( solution , coarseSolution , isoValue , depth , slice , 1 , slabValues , evaluator , threads );
-	if( slice<(1<<depth) ) _setSliceIsoCorners( solution , coarseSolution , isoValue , depth , slice , 0 , slabValues , evaluator , threads );
-}
-template< class Real >
-template< class Vertex , int FEMDegree , BoundaryType BType >
-void Octree< Real >::_setSliceIsoCorners( const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , Real isoValue , LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& slabValues , const struct _Evaluator< FEMDegree , BType >& evaluator , int threads )
-{
-	typename Octree::template _SliceValues< Vertex >& sValues = slabValues[depth].sliceValues( slice );
-	std::vector< ConstPointSupportKey< FEMDegree > > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slice-z) ; i<_sNodesEnd(depth,slice-z) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		Real squareValues[ Square::CORNERS ];
-		ConstPointSupportKey< FEMDegree >& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		if( !IsActiveNode( leaf->children ) )
-		{
-			const typename SortedTreeNodes::SquareCornerIndices& cIndices = sValues.sliceData.cornerIndices( leaf );
-
-			bool isInterior = _isInteriorlySupported< FEMDegree >( leaf->parent );
-			neighborKey.getNeighbors( leaf );
-
-			for( int x=0 ; x<2 ; x++ ) for( int y=0 ; y<2 ; y++ )
-			{
-				int cc = Cube::CornerIndex( x , y , z );
-				int fc = Square::CornerIndex( x , y );
-				int vIndex = cIndices[fc];
-				if( !sValues.cornerSet[vIndex] )
-				{
-					if( sValues.cornerGradients )
-					{
-						std::pair< Real , Point3D< Real > > p = _getCornerValueAndGradient( neighborKey , leaf , cc , solution , coarseSolution , evaluator , isInterior );
-						sValues.cornerValues[vIndex] = p.first , sValues.cornerGradients[vIndex] = p.second;
-					}
-					else sValues.cornerValues[vIndex] = _getCornerValue( neighborKey , leaf , cc , solution , coarseSolution , evaluator , isInterior );
-					sValues.cornerSet[vIndex] = 1;
-				}
-				squareValues[fc] = sValues.cornerValues[ vIndex ];
-				TreeOctNode* node = leaf;
-				LocalDepth _depth = depth;
-				int _slice = slice;
-				while( _isValidSpaceNode( node->parent ) && (node-node->parent->children)==cc )
-				{
-					node = node->parent , _depth-- , _slice >>= 1;
-					typename Octree::template _SliceValues< Vertex >& _sValues = slabValues[_depth].sliceValues( _slice );
-					const typename SortedTreeNodes::SquareCornerIndices& _cIndices = _sValues.sliceData.cornerIndices( node );
-					int _vIndex = _cIndices[fc];
-					_sValues.cornerValues[_vIndex] = sValues.cornerValues[vIndex];
-					if( _sValues.cornerGradients ) _sValues.cornerGradients[_vIndex] = sValues.cornerGradients[vIndex];
-					_sValues.cornerSet[_vIndex] = 1;
-				}
-			}
-			sValues.mcIndices[ i - sValues.sliceData.nodeOffset ] = MarchingSquares::GetIndex( squareValues , isoValue );
-		}
-	}
-}
-
-template< class Real >
-template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-void Octree< Real >::_setSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slice , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	if( slice>0          ) _setSliceIsoVertices< WeightDegree , ColorDegree >( colorBSData , densityWeights , colorData , isoValue , depth , slice , 1 , vOffset , mesh , slabValues , threads );
-	if( slice<(1<<depth) ) _setSliceIsoVertices< WeightDegree , ColorDegree >( colorBSData , densityWeights , colorData , isoValue , depth , slice , 0 , vOffset , mesh , slabValues , threads );
-}
-template< class Real >
-template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-void Octree< Real >::_setSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slice , int z , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	typename Octree::template _SliceValues< Vertex >& sValues = slabValues[depth].sliceValues( slice );
-	// [WARNING] In the case Degree=2, these two keys are the same, so we don't have to maintain them separately.
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	std::vector< ConstPointSupportKey< WeightDegree > > weightKeys( std::max< int >( 1 , threads ) );
-	std::vector< ConstPointSupportKey< ColorDegree > > colorKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) ) , weightKeys[i].set( _localToGlobal( depth ) ) , colorKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slice-z) ; i<_sNodesEnd(depth,slice-z) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		ConstAdjacenctNodeKey& neighborKey =  neighborKeys[ omp_get_thread_num() ];
-		ConstPointSupportKey< WeightDegree >& weightKey = weightKeys[ omp_get_thread_num() ];
-		ConstPointSupportKey< ColorDegree >& colorKey = colorKeys[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		if( !IsActiveNode( leaf->children ) )
-		{
-			int idx = i - sValues.sliceData.nodeOffset;
-			const typename SortedTreeNodes::SquareEdgeIndices& eIndices = sValues.sliceData.edgeIndices( leaf );
-			if( MarchingSquares::HasRoots( sValues.mcIndices[idx] ) )
-			{
-				neighborKey.getNeighbors( leaf );
-				if( densityWeights ) weightKey.getNeighbors( leaf );
-				if( colorData ) colorKey.getNeighbors( leaf );
-				for( int e=0 ; e<Square::EDGES ; e++ )
-					if( MarchingSquares::HasEdgeRoots( sValues.mcIndices[idx] , e ) )
-					{
-						int vIndex = eIndices[e];
-						if( !sValues.edgeSet[vIndex] )
-						{
-							Vertex vertex;
-							int o , y;
-							Square::FactorEdgeIndex( e , o , y );
-							long long key = VertexData::EdgeIndex( leaf , Cube::EdgeIndex( o , y , z ) , _localToGlobal(_maxDepth) );
-							_getIsoVertex( colorBSData , densityWeights , colorData , isoValue , weightKey , colorKey , leaf , e , z , sValues , vertex );
-							bool stillOwner = false;
-							std::pair< int , Vertex > hashed_vertex;
-#pragma omp critical (add_point_access)
-							{
-								if( !sValues.edgeSet[vIndex] )
-								{
-									mesh.addOutOfCorePoint( vertex );
-									sValues.edgeSet[ vIndex ] = 1;
-									sValues.edgeKeys[ vIndex ] = key;
-									sValues.edgeVertexMap[key] = hashed_vertex = std::pair< int , Vertex >( vOffset , vertex );
-									vOffset++;
-									stillOwner = true;
-								}
-							}
-							if( stillOwner )
-							{
-								// We only need to pass the iso-vertex down if the edge it lies on is adjacent to a coarser leaf
-								bool isNeeded;
-								switch( o )
-								{
-								case 0: isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][2*y][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][2*y][2*z] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][1][2*z] ) ) ; break;
-								case 1: isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[2*y][1][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[2*y][1][2*z] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][1][2*z] ) ) ; break;
-								}
-								if( isNeeded )
-								{
-									int f[2];
-									Cube::FacesAdjacentToEdge( Cube::EdgeIndex( o , y , z ) , f[0] , f[1] );
-									for( int k=0 ; k<2 ; k++ )
-									{
-										TreeOctNode* node = leaf;
-										LocalDepth _depth = depth;
-										int _slice = slice;
-										bool _isNeeded = isNeeded;
-										while( _isNeeded && _isValidSpaceNode( node->parent ) && Cube::IsFaceCorner( (int)(node-node->parent->children) , f[k] ) )
-										{
-											node = node->parent , _depth-- , _slice >>= 1;
-											typename Octree::template _SliceValues< Vertex >& _sValues = slabValues[_depth].sliceValues( _slice );
-#pragma omp critical (add_coarser_point_access)
-											_sValues.edgeVertexMap[key] = hashed_vertex;
-											switch( o )
-											{
-												case 0: _isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][2*y][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][2*y][2*z] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][1][2*z] ) ) ; break;
-												case 1: _isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[2*y][1][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[2*y][1][2*z] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][1][2*z] ) ) ; break;
-											}
-										}
-									}
-								}
-							}
-						}
-					}
-			}
-		}
-	}
-}
-template< class Real >
-template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-void Octree< Real >::_setXSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slab , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	typename Octree::template  _SliceValues< Vertex >& bValues = slabValues[depth].sliceValues ( slab   );
-	typename Octree::template  _SliceValues< Vertex >& fValues = slabValues[depth].sliceValues ( slab+1 );
-	typename Octree::template _XSliceValues< Vertex >& xValues = slabValues[depth].xSliceValues( slab   );
-
-	// [WARNING] In the case Degree=2, these two keys are the same, so we don't have to maintain them separately.
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	std::vector< ConstPointSupportKey< WeightDegree > > weightKeys( std::max< int >( 1 , threads ) );
-	std::vector< ConstPointSupportKey< ColorDegree > > colorKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) ) , weightKeys[i].set( _localToGlobal( depth ) ) , colorKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slab) ; i<_sNodesEnd(depth,slab) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		ConstAdjacenctNodeKey& neighborKey =  neighborKeys[ omp_get_thread_num() ];
-		ConstPointSupportKey< WeightDegree >& weightKey = weightKeys[ omp_get_thread_num() ];
-		ConstPointSupportKey< ColorDegree >& colorKey = colorKeys[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		if( !IsActiveNode( leaf->children ) )
-		{
-			unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ] )<<4;
-			const typename SortedTreeNodes::SquareCornerIndices& eIndices = xValues.xSliceData.edgeIndices( leaf );
-			if( MarchingCubes::HasRoots( mcIndex ) )
-			{
-				neighborKey.getNeighbors( leaf );
-				if( densityWeights ) weightKey.getNeighbors( leaf );
-				if( colorData ) colorKey.getNeighbors( leaf );
-				for( int x=0 ; x<2 ; x++ ) for( int y=0 ; y<2 ; y++ )
-				{
-					int c = Square::CornerIndex( x , y );
-					int e = Cube::EdgeIndex( 2 , x , y );
-					if( MarchingCubes::HasEdgeRoots( mcIndex , e ) )
-					{
-						int vIndex = eIndices[c];
-						if( !xValues.edgeSet[vIndex] )
-						{
-							Vertex vertex;
-							long long key = VertexData::EdgeIndex( leaf , e , _localToGlobal(_maxDepth) );
-							_getIsoVertex( colorBSData , densityWeights , colorData , isoValue , weightKey , colorKey , leaf , c , bValues , fValues , vertex );
-							bool stillOwner = false;
-							std::pair< int , Vertex > hashed_vertex;
-#pragma omp critical (add_x_point_access)
-							{
-								if( !xValues.edgeSet[vIndex] )
-								{
-									mesh.addOutOfCorePoint( vertex );
-									xValues.edgeSet[ vIndex ] = 1;
-									xValues.edgeKeys[ vIndex ] = key;
-									xValues.edgeVertexMap[key] = hashed_vertex = std::pair< int , Vertex >( vOffset , vertex );
-									stillOwner = true;
-									vOffset++;
-								}
-							}
-							if( stillOwner )
-							{
-								// We only need to pass the iso-vertex down if the edge it lies on is adjacent to a coarser leaf
-								bool isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[2*x][1][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[2*x][2*y][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][2*y][1] ) );
-								if( isNeeded )
-								{
-									int f[2];
-									Cube::FacesAdjacentToEdge( e , f[0] , f[1] );
-									for( int k=0 ; k<2 ; k++ )
-									{
-										TreeOctNode* node = leaf;
-										LocalDepth _depth = depth;
-										int _slab = slab;
-										bool _isNeeded = isNeeded;
-										while( _isNeeded && _isValidSpaceNode( node->parent ) && Cube::IsFaceCorner( (int)(node-node->parent->children) , f[k] ) )
-										{
-											node = node->parent , _depth-- , _slab >>= 1;
-											typename Octree::template _XSliceValues< Vertex >& _xValues = slabValues[_depth].xSliceValues( _slab );
-#pragma omp critical (add_x_coarser_point_access)
-											_xValues.edgeVertexMap[key] = hashed_vertex;
-											_isNeeded = ( !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[2*x][1][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[2*x][2*y][1] ) || !_isValidSpaceNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][2*y][1] ) );
-										}
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_copyFinerSliceIsoEdgeKeys( LocalDepth depth , int slice , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	if( slice>0          ) _copyFinerSliceIsoEdgeKeys( depth , slice , 1 , slabValues , threads );
-	if( slice<(1<<depth) ) _copyFinerSliceIsoEdgeKeys( depth , slice , 0 , slabValues , threads );
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_copyFinerSliceIsoEdgeKeys( LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	_SliceValues< Vertex >& pSliceValues = slabValues[depth  ].sliceValues(slice   );
-	_SliceValues< Vertex >& cSliceValues = slabValues[depth+1].sliceValues(slice<<1);
-	typename SortedTreeNodes::SliceTableData& pSliceData = pSliceValues.sliceData;
-	typename SortedTreeNodes::SliceTableData& cSliceData = cSliceValues.sliceData;
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slice-z) ; i<_sNodesEnd(depth,slice-z) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-		if( IsActiveNode( _sNodes.treeNodes[i]->children ) )
-		{
-			typename SortedTreeNodes::SquareEdgeIndices& pIndices = pSliceData.edgeIndices( i );
-			// Copy the edges that overlap the coarser edges
-			for( int orientation=0 ; orientation<2 ; orientation++ ) for( int y=0 ; y<2 ; y++ )
-			{
-				int fe = Square::EdgeIndex( orientation , y );
-				int pIndex = pIndices[fe];
-				if( !pSliceValues.edgeSet[ pIndex ] )
-				{
-					int ce = Cube::EdgeIndex( orientation , y , z );
-					int c1 , c2;
-					switch( orientation )
-					{
-					case 0: c1 = Cube::CornerIndex( 0 , y , z ) , c2 = Cube::CornerIndex( 1 , y , z ) ; break;
-					case 1: c1 = Cube::CornerIndex( y , 0 , z ) , c2 = Cube::CornerIndex( y , 1 , z ) ; break;
-					}
-					// [SANITY CHECK]
-//					if( _isValidSpaceNode( _sNodes.treeNodes[i]->children + c1 )!=_isValidSpaceNode( _sNodes.treeNodes[i]->children + c2 ) ) fprintf( stderr , "[WARNING] Finer edges should both be valid or invalid\n" ) , exit( 0 );
-					if( !_isValidSpaceNode( _sNodes.treeNodes[i]->children + c1 ) || !_isValidSpaceNode( _sNodes.treeNodes[i]->children + c2 ) ) continue;
-
-					int cIndex1 = cSliceData.edgeIndices( _sNodes.treeNodes[i]->children + c1 )[fe];
-					int cIndex2 = cSliceData.edgeIndices( _sNodes.treeNodes[i]->children + c2 )[fe];
-					if( cSliceValues.edgeSet[cIndex1] != cSliceValues.edgeSet[cIndex2] )
-					{
-						long long key;
-						if( cSliceValues.edgeSet[cIndex1] ) key = cSliceValues.edgeKeys[cIndex1];
-						else                                key = cSliceValues.edgeKeys[cIndex2];
-						std::pair< int , Vertex > vPair = cSliceValues.edgeVertexMap.find( key )->second;
-#pragma omp critical ( copy_finer_edge_keys )
-						pSliceValues.edgeVertexMap[key] = vPair;
-						pSliceValues.edgeKeys[pIndex] = key;
-						pSliceValues.edgeSet[pIndex] = 1;
-					}
-					else if( cSliceValues.edgeSet[cIndex1] && cSliceValues.edgeSet[cIndex2] )
-					{
-						long long key1 = cSliceValues.edgeKeys[cIndex1] , key2 = cSliceValues.edgeKeys[cIndex2];
-#pragma omp critical ( set_edge_pairs )
-						pSliceValues.vertexPairMap[ key1 ] = key2 ,	pSliceValues.vertexPairMap[ key2 ] = key1;
-
-						const TreeOctNode* node = _sNodes.treeNodes[i];
-						LocalDepth _depth = depth;
-						int _slice = slice;
-						while( _isValidSpaceNode( node->parent ) && Cube::IsEdgeCorner( (int)( node - node->parent->children ) , ce ) )
-						{
-							node = node->parent , _depth-- , _slice >>= 1;
-							_SliceValues< Vertex >& _pSliceValues = slabValues[_depth].sliceValues(_slice);
-#pragma omp critical ( set_edge_pairs )
-							_pSliceValues.vertexPairMap[ key1 ] = key2 , _pSliceValues.vertexPairMap[ key2 ] = key1;
-						}
-					}
-				}
-			}
-		}
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_copyFinerXSliceIsoEdgeKeys( LocalDepth depth , int slab , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	_XSliceValues< Vertex >& pSliceValues  = slabValues[depth  ].xSliceValues(slab);
-	_XSliceValues< Vertex >& cSliceValues0 = slabValues[depth+1].xSliceValues( (slab<<1)|0 );
-	_XSliceValues< Vertex >& cSliceValues1 = slabValues[depth+1].xSliceValues( (slab<<1)|1 );
-	typename SortedTreeNodes::XSliceTableData& pSliceData  = pSliceValues.xSliceData;
-	typename SortedTreeNodes::XSliceTableData& cSliceData0 = cSliceValues0.xSliceData;
-	typename SortedTreeNodes::XSliceTableData& cSliceData1 = cSliceValues1.xSliceData;
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slab) ; i<_sNodesEnd(depth,slab) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-		if( IsActiveNode( _sNodes.treeNodes[i]->children ) )
-		{
-			typename SortedTreeNodes::SquareCornerIndices& pIndices = pSliceData.edgeIndices( i );
-			for( int x=0 ; x<2 ; x++ ) for( int y=0 ; y<2 ; y++ )
-			{
-				int fc = Square::CornerIndex( x , y );
-				int pIndex = pIndices[fc];
-				if( !pSliceValues.edgeSet[pIndex] )
-				{
-					int c0 = Cube::CornerIndex( x , y , 0 ) , c1 = Cube::CornerIndex( x , y , 1 );
-
-					// [SANITY CHECK]
-//					if( _isValidSpaceNode( _sNodes.treeNodes[i]->children + c0 )!=_isValidSpaceNode( _sNodes.treeNodes[i]->children + c1 ) ) fprintf( stderr , "[ERROR] Finer edges should both be valid or invalid\n" ) , exit( 0 );
-					if( !_isValidSpaceNode( _sNodes.treeNodes[i]->children + c0 ) || !_isValidSpaceNode( _sNodes.treeNodes[i]->children + c1 ) ) continue;
-
-					int cIndex0 = cSliceData0.edgeIndices( _sNodes.treeNodes[i]->children + c0 )[fc];
-					int cIndex1 = cSliceData1.edgeIndices( _sNodes.treeNodes[i]->children + c1 )[fc];
-					if( cSliceValues0.edgeSet[cIndex0] != cSliceValues1.edgeSet[cIndex1] )
-					{
-						long long key;
-						std::pair< int , Vertex > vPair;
-						if( cSliceValues0.edgeSet[cIndex0] ) key = cSliceValues0.edgeKeys[cIndex0] , vPair = cSliceValues0.edgeVertexMap.find( key )->second;
-						else                                 key = cSliceValues1.edgeKeys[cIndex1] , vPair = cSliceValues1.edgeVertexMap.find( key )->second;
-#pragma omp critical ( copy_finer_x_edge_keys )
-						pSliceValues.edgeVertexMap[key] = vPair;
-						pSliceValues.edgeKeys[ pIndex ] = key;
-						pSliceValues.edgeSet[ pIndex ] = 1;
-					}
-					else if( cSliceValues0.edgeSet[cIndex0] && cSliceValues1.edgeSet[cIndex1] )
-					{
-						long long key0 = cSliceValues0.edgeKeys[cIndex0] , key1 = cSliceValues1.edgeKeys[cIndex1];
-#pragma omp critical ( set_x_edge_pairs )
-						pSliceValues.vertexPairMap[ key0 ] = key1 , pSliceValues.vertexPairMap[ key1 ] = key0;
-						const TreeOctNode* node = _sNodes.treeNodes[i];
-						LocalDepth _depth = depth;
-						int _slab = slab , ce = Cube::CornerIndex( 2 , x , y );
-						while( _isValidSpaceNode( node->parent ) && Cube::IsEdgeCorner( (int)( node - node->parent->children ) , ce ) )
-						{
-							node = node->parent , _depth-- , _slab>>= 1;
-							_SliceValues< Vertex >& _pSliceValues = slabValues[_depth].sliceValues(_slab);
-#pragma omp critical ( set_x_edge_pairs )
-							_pSliceValues.vertexPairMap[ key0 ] = key1 , _pSliceValues.vertexPairMap[ key1 ] = key0;
-						}
-					}
-				}
-			}
-		}
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_setSliceIsoEdges( LocalDepth depth , int slice , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	if( slice>0          ) _setSliceIsoEdges( depth , slice , 1 , slabValues , threads );
-	if( slice<(1<<depth) ) _setSliceIsoEdges( depth , slice , 0 , slabValues , threads );
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_setSliceIsoEdges( LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	typename Octree::template _SliceValues< Vertex >& sValues = slabValues[depth].sliceValues( slice );
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth, slice-z) ; i<_sNodesEnd(depth,slice-z) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		int isoEdges[ 2 * MarchingSquares::MAX_EDGES ];
-		ConstAdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		if( !IsActiveNode( leaf->children ) )
-		{
-			int idx = i - sValues.sliceData.nodeOffset;
-			const typename SortedTreeNodes::SquareEdgeIndices& eIndices = sValues.sliceData.edgeIndices( leaf );
-			const typename SortedTreeNodes::SquareFaceIndices& fIndices = sValues.sliceData.faceIndices( leaf );
-			unsigned char mcIndex = sValues.mcIndices[idx];
-			if( !sValues.faceSet[ fIndices[0] ] )
-			{
-				neighborKey.getNeighbors( leaf );
-				if( !IsActiveNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][1][2*z] ) || !IsActiveNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[1][1][2*z]->children ) )
-				{
-					_FaceEdges fe;
-					fe.count = MarchingSquares::AddEdgeIndices( mcIndex , isoEdges );
-					for( int j=0 ; j<fe.count ; j++ ) for( int k=0 ; k<2 ; k++ )
-					{
-						if( !sValues.edgeSet[ eIndices[ isoEdges[2*j+k] ] ] ) fprintf( stderr , "[ERROR] Edge not set 1: %d / %d\n" , slice , 1<<depth ) , exit( 0 );
-						fe.edges[j][k] = sValues.edgeKeys[ eIndices[ isoEdges[2*j+k] ] ];
-					}
-					sValues.faceSet[ fIndices[0] ] = 1;
-					sValues.faceEdges[ fIndices[0] ] = fe;
-
-					TreeOctNode* node = leaf;
-					LocalDepth _depth = depth;
-					int _slice = slice , f = Cube::FaceIndex( 2 , z );
-					std::vector< _IsoEdge > edges;
-					edges.resize( fe.count );
-					for( int j=0 ; j<fe.count ; j++ ) edges[j] = fe.edges[j];
-					while( _isValidSpaceNode( node->parent ) && Cube::IsFaceCorner( (int)(node-node->parent->children) , f ) )
-					{
-						node = node->parent , _depth-- , _slice >>= 1;
-						if( IsActiveNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][1][2*z] ) && IsActiveNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[1][1][2*z]->children ) ) break;
-						long long key = VertexData::FaceIndex( node , f , _localToGlobal(_maxDepth) );
-#pragma omp critical( add_iso_edge_access )
-						{
-							typename Octree::template _SliceValues< Vertex >& _sValues = slabValues[_depth].sliceValues( _slice );
-							typename std::unordered_map< long long, std::vector< _IsoEdge > >::iterator iter = _sValues.faceEdgeMap.find(key);
-							if( iter==_sValues.faceEdgeMap.end() ) _sValues.faceEdgeMap[key] = edges;
-							else for( int j=0 ; j<fe.count ; j++ ) iter->second.push_back( fe.edges[j] );
-						}
-					}
-				}
-			}
-		}
-	}
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_setXSliceIsoEdges( LocalDepth depth , int slab , std::vector< _SlabValues< Vertex > >& slabValues , int threads )
-{
-	typename Octree::template  _SliceValues< Vertex >& bValues = slabValues[depth].sliceValues ( slab   );
-	typename Octree::template  _SliceValues< Vertex >& fValues = slabValues[depth].sliceValues ( slab+1 );
-	typename Octree::template _XSliceValues< Vertex >& xValues = slabValues[depth].xSliceValues( slab   );
-
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,slab) ; i<_sNodesEnd(depth,slab) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		int isoEdges[ 2 * MarchingSquares::MAX_EDGES ];
-		ConstAdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		if( !IsActiveNode( leaf->children ) )
-		{
-			const typename SortedTreeNodes::SquareCornerIndices& cIndices = xValues.xSliceData.edgeIndices( leaf );
-			const typename SortedTreeNodes::SquareEdgeIndices& eIndices = xValues.xSliceData.faceIndices( leaf );
-			unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ]<<4 );
-			{
-				neighborKey.getNeighbors( leaf );
-				for( int o=0 ; o<2 ; o++ ) for( int x=0 ; x<2 ; x++ )
-				{
-					int e = Square::EdgeIndex( o , x );
-					int f = Cube::FaceIndex( 1-o , x );
-					unsigned char _mcIndex = MarchingCubes::GetFaceIndex( mcIndex , f );
-					int xx = o==1 ? 2*x : 1 , yy = o==0 ? 2*x : 1 , zz = 1;
-					if(	!xValues.faceSet[ eIndices[e] ] && ( !IsActiveNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[xx][yy][zz] ) || !IsActiveNode( neighborKey.neighbors[ _localToGlobal( depth ) ].neighbors[xx][yy][zz]->children ) ) )
-					{
-						_FaceEdges fe;
-						fe.count = MarchingSquares::AddEdgeIndices( _mcIndex , isoEdges );
-						for( int j=0 ; j<fe.count ; j++ ) for( int k=0 ; k<2 ; k++ )
-						{
-							int _o , _x;
-							Square::FactorEdgeIndex( isoEdges[2*j+k] , _o , _x );
-							if( _o==1 ) // Cross-edge
-							{
-								int idx = o==0 ? cIndices[ Square::CornerIndex(_x,x) ] : cIndices[ Square::CornerIndex(x,_x) ];
-								if( !xValues.edgeSet[ idx ] ) fprintf( stderr , "[ERROR] Edge not set 3: %d / %d\n" , slab , 1<<depth ) , exit( 0 );
-								fe.edges[j][k] = xValues.edgeKeys[ idx ];
-							}
-							else
-							{
-								const typename Octree::template _SliceValues< Vertex >& sValues = (_x==0) ? bValues : fValues;
-								int idx = sValues.sliceData.edgeIndices(i)[ Square::EdgeIndex(o,x) ];
-								if( !sValues.edgeSet[ idx ] ) fprintf( stderr , "[ERROR] Edge not set 5: %d / %d\n" , slab , 1<<depth ) , exit( 0 );
-								fe.edges[j][k] = sValues.edgeKeys[ idx ];
-							}
-						}
-						xValues.faceSet[ eIndices[e] ] = 1;
-						xValues.faceEdges[ eIndices[e] ] = fe;
-
-						TreeOctNode* node = leaf;
-						LocalDepth _depth = depth;
-						int _slab = slab;
-						std::vector< _IsoEdge > edges;
-						edges.resize( fe.count );
-						for( int j=0 ; j<fe.count ; j++ ) edges[j] = fe.edges[j];
-						while( _isValidSpaceNode( node->parent ) && Cube::IsFaceCorner( (int)(node-node->parent->children) , f ) )
-						{
-							node = node->parent , _depth-- , _slab >>= 1;
-							if( IsActiveNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[xx][yy][zz] ) && IsActiveNode( neighborKey.neighbors[ _localToGlobal( _depth ) ].neighbors[xx][yy][zz]->children ) ) break;
-							long long key = VertexData::FaceIndex( node , f , _localToGlobal(_maxDepth) );
-#pragma omp critical( add_x_iso_edge_access )
-							{
-								typename Octree::template _XSliceValues< Vertex >& _xValues = slabValues[_depth].xSliceValues( _slab );
-								typename std::unordered_map< long long, std::vector< _IsoEdge > >::iterator iter = _xValues.faceEdgeMap.find(key);
-								if( iter==_xValues.faceEdgeMap.end() ) _xValues.faceEdgeMap[key] = edges;
-								else for( int j=0 ; j<fe.count ; j++ ) iter->second.push_back( fe.edges[j] );
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-}
-template< class Real >
-template< class Vertex >
-void Octree< Real >::_setIsoSurface( LocalDepth depth , int offset , const _SliceValues< Vertex >& bValues , const _SliceValues< Vertex >& fValues , const _XSliceValues< Vertex >& xValues , CoredMeshData< Vertex >& mesh , bool polygonMesh , bool addBarycenter , int& vOffset , int threads )
-{
-	std::vector< std::pair< int , Vertex > > polygon;
-	std::vector< std::vector< _IsoEdge > > edgess( std::max< int >( 1 , threads ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth,offset) ; i<_sNodesEnd(depth,offset) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		std::vector< _IsoEdge >& edges = edgess[ omp_get_thread_num() ];
-		TreeOctNode* leaf = _sNodes.treeNodes[i];
-		int res = 1<<depth;
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( leaf , d , off );
-		bool inBounds = off[0]>=0 && off[0]<res && off[1]>=0 && off[1]<res && off[2]>=0 && off[2]<res;
-		if( inBounds && !IsActiveNode( leaf->children ) )
-		{
-			edges.clear();
-			unsigned char mcIndex = ( bValues.mcIndices[ i - bValues.sliceData.nodeOffset ] ) | ( fValues.mcIndices[ i - fValues.sliceData.nodeOffset ]<<4 );
-			// [WARNING] Just because the node looks empty doesn't mean it doesn't get eges from finer neighbors
-			{
-				// Gather the edges from the faces (with the correct orientation)
-				for( int f=0 ; f<Cube::FACES ; f++ )
-				{
-					int d , o;
-					Cube::FactorFaceIndex( f , d , o );
-					int flip = d==1 ? 1 : 0; // To account for the fact that the section in y flips the orientation
-					if( o ) flip = 1-flip;
-					flip = 1-flip; // To get the right orientation
-					if( d==2 )
-					{
-						const _SliceValues< Vertex >& sValues = (o==0) ? bValues : fValues;
-						int fIdx = sValues.sliceData.faceIndices(i)[0];
-						if( sValues.faceSet[fIdx] )
-						{
-							const _FaceEdges& fe = sValues.faceEdges[ fIdx ];
-							for( int j=0 ; j<fe.count ; j++ ) edges.push_back( _IsoEdge( fe.edges[j][flip] , fe.edges[j][1-flip] ) );
-						}
-						else
-						{
-							long long key = VertexData::FaceIndex( leaf , f , _localToGlobal(_maxDepth) );
-							typename std::unordered_map< long long, std::vector< _IsoEdge > >::const_iterator iter = sValues.faceEdgeMap.find(key);
-							if( iter!=sValues.faceEdgeMap.end() )
-							{
-								const std::vector< _IsoEdge >& _edges = iter->second;
-								for( size_t j=0 ; j<_edges.size() ; j++ ) edges.push_back( _IsoEdge( _edges[j][flip] , _edges[j][1-flip] ) );
-							}
-							else fprintf( stderr , "[ERROR] Invalid faces: %d  %d %d\n" , i , d , o ) , exit( 0 );
-						}
-					}
-					else
-					{
-						int fIdx = xValues.xSliceData.faceIndices(i)[ Square::EdgeIndex( 1-d , o ) ];
-						if( xValues.faceSet[fIdx] )
-						{
-							const _FaceEdges& fe = xValues.faceEdges[ fIdx ];
-							for( int j=0 ; j<fe.count ; j++ ) edges.push_back( _IsoEdge( fe.edges[j][flip] , fe.edges[j][1-flip] ) );
-						}
-						else
-						{
-							long long key = VertexData::FaceIndex( leaf , f , _localToGlobal(_maxDepth) );
-							typename std::unordered_map< long long , std::vector< _IsoEdge > >::const_iterator iter = xValues.faceEdgeMap.find(key);
-							if( iter!=xValues.faceEdgeMap.end() )
-							{
-								const std::vector< _IsoEdge >& _edges = iter->second;
-								for( size_t j=0 ; j<_edges.size() ; j++ ) edges.push_back( _IsoEdge( _edges[j][flip] , _edges[j][1-flip] ) );
-							}
-							else fprintf( stderr , "[ERROR] Invalid faces: %d  %d %d\n" , i , d , o ) , exit( 0 );
-						}
-					}
-				}
-				// Get the edge loops
-				std::vector< std::vector< long long  > > loops;
-				while( edges.size() )
-				{
-					loops.resize( loops.size()+1 );
-					_IsoEdge edge = edges.back();
-					edges.pop_back();
-					long long start = edge[0] , current = edge[1];
-					while( current!=start )
-					{
-						int idx;
-						for( idx=0 ; idx<(int)edges.size() ; idx++ ) if( edges[idx][0]==current ) break;
-						if( idx==edges.size() )
-						{
-							typename std::unordered_map< long long, long long >::const_iterator iter;
-							if     ( (iter=bValues.vertexPairMap.find(current))!=bValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
-							else if( (iter=fValues.vertexPairMap.find(current))!=fValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
-							else if( (iter=xValues.vertexPairMap.find(current))!=xValues.vertexPairMap.end() ) loops.back().push_back( current ) , current = iter->second;
-							else
-							{
-								LocalDepth d ; LocalOffset off;
-								_localDepthAndOffset( leaf , d , off );
-								fprintf( stderr , "[ERROR] Failed to close loop [%d: %d %d %d] | (%d): %lld\n" , d-1 , off[0] , off[1] , off[2] , i , current );
-								exit( 0 );
-							}
-						}
-						else
-						{
-							loops.back().push_back( current );
-							current = edges[idx][1];
-							edges[idx] = edges.back() , edges.pop_back();
-						}
-					}
-					loops.back().push_back( start );
-				}
-				// Add the loops to the mesh
-				for( size_t j=0 ; j<loops.size() ; j++ )
-				{
-					std::vector< std::pair< int , Vertex > > polygon( loops[j].size() );
-					for( size_t k=0 ; k<loops[j].size() ; k++ )
-					{
-						long long key = loops[j][k];
-						typename std::unordered_map< long long, std::pair< int, Vertex > >::const_iterator iter;
-						if     ( ( iter=bValues.edgeVertexMap.find( key ) )!=bValues.edgeVertexMap.end() ) polygon[k] = iter->second;
-						else if( ( iter=fValues.edgeVertexMap.find( key ) )!=fValues.edgeVertexMap.end() ) polygon[k] = iter->second;
-						else if( ( iter=xValues.edgeVertexMap.find( key ) )!=xValues.edgeVertexMap.end() ) polygon[k] = iter->second;
-						else fprintf( stderr , "[ERROR] Couldn't find vertex in edge map\n" ) , exit( 0 );
-					}
-					_addIsoPolygons( mesh , polygon , polygonMesh , addBarycenter , vOffset );
-				}
-			}
-		}
-	}
-}
-template< class Real > void SetColor( Point3D< Real >& color , unsigned char c[3] ){ for( int i=0 ; i<3 ; i++ ) c[i] = (unsigned char)std::max< int >( 0 , std::min< int >( 255 , (int)( color[i]+0.5 ) ) ); }
-
-template< class Real > void SetIsoVertex(              PlyVertex< float  >& vertex , Point3D< Real > color , Real value ){ ; }
-template< class Real > void SetIsoVertex(         PlyColorVertex< float  >& vertex , Point3D< Real > color , Real value ){ SetColor( color , vertex.color ); }
-template< class Real > void SetIsoVertex(         PlyValueVertex< float  >& vertex , Point3D< Real > color , Real value ){                                    vertex.value = float(value); }
-template< class Real > void SetIsoVertex( PlyColorAndValueVertex< float  >& vertex , Point3D< Real > color , Real value ){ SetColor( color , vertex.color ) , vertex.value = float(value); }
-template< class Real > void SetIsoVertex(              PlyVertex< double >& vertex , Point3D< Real > color , Real value ){ ; }
-template< class Real > void SetIsoVertex(         PlyColorVertex< double >& vertex , Point3D< Real > color , Real value ){ SetColor( color , vertex.color ); }
-template< class Real > void SetIsoVertex(         PlyValueVertex< double >& vertex , Point3D< Real > color , Real value ){                                    vertex.value = double(value); }
-template< class Real > void SetIsoVertex( PlyColorAndValueVertex< double >& vertex , Point3D< Real > color , Real value ){ SetColor( color , vertex.color ) , vertex.value = double(value); }
-
-template< class Real >
-template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-bool Octree< Real >::_getIsoVertex( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , ConstPointSupportKey< WeightDegree >& weightKey , ConstPointSupportKey< ColorDegree >& colorKey , const TreeOctNode* node , int edgeIndex , int z , const _SliceValues< Vertex >& sValues , Vertex& vertex )
-{
-	Point3D< Real > position;
-	int c0 , c1;
-	Square::EdgeCorners( edgeIndex , c0 , c1 );
-
-	bool nonLinearFit = sValues.cornerGradients!=NullPointer( Point3D< Real > );
-	const typename SortedTreeNodes::SquareCornerIndices& idx = sValues.sliceData.cornerIndices( node );
-	Real x0 = sValues.cornerValues[idx[c0]] , x1 = sValues.cornerValues[idx[c1]];
-	Point3D< Real > s;
-	Real start , width;
-	_startAndWidth( node , s , width );
-	int o , y;
-	Square::FactorEdgeIndex( edgeIndex , o , y );
-	start = s[o];
-	switch( o )
-	{
-	case 0:
-		position[1] = s[1] + width*y;
-		position[2] = s[2] + width*z;
-		break;
-	case 1:
-		position[0] = s[0] + width*y;
-		position[2] = s[2] + width*z;
-		break;
-	}
-
-	double averageRoot;
-	bool rootFound = false;
-	if( nonLinearFit )
-	{
-		double dx0 = sValues.cornerGradients[idx[c0]][o] * width , dx1 = sValues.cornerGradients[idx[c1]][o] * width;
-	
-		// The scaling will turn the Hermite Spline into a quadratic
-		double scl = (x1-x0) / ( (dx1+dx0 ) / 2 );
-		dx0 *= scl , dx1 *= scl;
-
-		// Hermite Spline
-		Polynomial< 2 > P;
-		P.coefficients[0] = x0;
-		P.coefficients[1] = dx0;
-		P.coefficients[2] = 3*(x1-x0)-dx1-2*dx0;
-	
-		double roots[2];
-		int rCount = 0 , rootCount = P.getSolutions( isoValue , roots , 0 );
-		averageRoot = 0;
-		for( int i=0 ; i<rootCount ; i++ ) if( roots[i]>=0 && roots[i]<=1 ) averageRoot += roots[i] , rCount++;
-		if( rCount ) rootFound = true;
-		averageRoot /= rCount;
-	}
-	if( !rootFound )
-	{
-		// We have a linear function L, with L(0) = x0 and L(1) = x1
-		// => L(t) = x0 + t * (x1-x0)
-		// => L(t) = isoValue <=> t = ( isoValue - x0 ) / ( x1 - x0 )
-		if( x0==x1 ) fprintf( stderr , "[ERROR] Not a zero-crossing root: %g %g\n" , x0 , x1 ) , exit( 0 );
-		averageRoot = ( isoValue - x0 ) / ( x1 - x0 );
-	}
-	if( averageRoot<0 || averageRoot>1 )
-	{
-		fprintf( stderr , "[WARNING] Bad average root: %f\n" , averageRoot );
-		fprintf( stderr , "\t(%f %f) (%f)\n" , x0 , x1 , isoValue );
-		if( averageRoot<0 ) averageRoot = 0;
-		if( averageRoot>1 ) averageRoot = 1;
-	}
-	position[o] = Real( start + width*averageRoot );
-	vertex.point = position;
-	Point3D< Real > color;
-	Real depth(0);
-	if( densityWeights )
-	{
-		Real weight;
-		_getSampleDepthAndWeight( *densityWeights , node , position , weightKey , depth , weight );
-	}
-	if( colorData ) color = Point3D< Real >( _evaluate< ProjectiveData< Point3D< Real > , Real > >( *colorData , position , *colorBSData , colorKey ) );
-	SetIsoVertex( vertex , color , depth );
-	return true;
-}
-template< class Real >
-template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-bool Octree< Real >::_getIsoVertex( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , ConstPointSupportKey< WeightDegree >& weightKey , ConstPointSupportKey< ColorDegree >& colorKey , const TreeOctNode* node , int cornerIndex , const _SliceValues< Vertex >& bValues , const _SliceValues< Vertex >& fValues , Vertex& vertex )
-{
-	Point3D< Real > position;
-
-	bool nonLinearFit = bValues.cornerGradients!=NullPointer( Point3D< Real > ) && fValues.cornerGradients!=NullPointer( Point3D< Real > );
-	const typename SortedTreeNodes::SquareCornerIndices& idx0 = bValues.sliceData.cornerIndices( node );
-	const typename SortedTreeNodes::SquareCornerIndices& idx1 = fValues.sliceData.cornerIndices( node );
-	Real x0 = bValues.cornerValues[ idx0[cornerIndex] ] , x1 = fValues.cornerValues[ idx1[cornerIndex] ];
-	Point3D< Real > s;
-	Real start , width;
-	_startAndWidth( node , s , width );
-	start = s[2];
-	int x , y;
-	Square::FactorCornerIndex( cornerIndex , x , y );
-
-
-	position[0] = s[0] + width*x;
-	position[1] = s[1] + width*y;
-
-	double averageRoot;
-
-	bool rootFound = false;
-	if( nonLinearFit )
-	{
-		double dx0 = bValues.cornerGradients[ idx0[cornerIndex] ][2] * width , dx1 = fValues.cornerGradients[ idx1[cornerIndex] ][2] * width;
-		// The scaling will turn the Hermite Spline into a quadratic
-		double scl = (x1-x0) / ( (dx1+dx0 ) / 2 );
-		dx0 *= scl , dx1 *= scl;
-
-		// Hermite Spline
-		Polynomial< 2 > P;
-		P.coefficients[0] = x0;
-		P.coefficients[1] = dx0;
-		P.coefficients[2] = 3*(x1-x0)-dx1-2*dx0;
-
-		double roots[2];
-		int rCount = 0 , rootCount = P.getSolutions( isoValue , roots , 0 );
-		averageRoot = 0;
-		for( int i=0 ; i<rootCount ; i++ ) if( roots[i]>=0 && roots[i]<=1 ) averageRoot += roots[i] , rCount++;
-		if( rCount ) rootFound = true;
-		averageRoot /= rCount;
-	}
-	if( !rootFound )
-	{
-		// We have a linear function L, with L(0) = x0 and L(1) = x1
-		// => L(t) = x0 + t * (x1-x0)
-		// => L(t) = isoValue <=> t = ( isoValue - x0 ) / ( x1 - x0 )
-		if( x0==x1 ) fprintf( stderr , "[ERROR] Not a zero-crossing root: %g %g\n" , x0 , x1 ) , exit( 0 );
-		averageRoot = ( isoValue - x0 ) / ( x1 - x0 );
-	}
-	if( averageRoot<0 || averageRoot>1 )
-	{
-		fprintf( stderr , "[WARNING] Bad average root: %f\n" , averageRoot );
-		fprintf( stderr , "\t(%f %f) (%f)\n" , x0 , x1 , isoValue );
-		if( averageRoot<0 ) averageRoot = 0;
-		if( averageRoot>1 ) averageRoot = 1;
-	}
-	position[2] = Real( start + width*averageRoot );
-	vertex.point = position;
-	Point3D< Real > color;
-	Real depth(0);
-	if( densityWeights )
-	{
-		Real weight;
-		_getSampleDepthAndWeight( *densityWeights , node , position , weightKey , depth , weight );
-	}
-	if( colorData ) color = Point3D< Real >( _evaluate< ProjectiveData< Point3D< Real > , Real > >( *colorData , position , *colorBSData , colorKey ) );
-	SetIsoVertex( vertex , color , depth );
-	return true;
-}
-
-template< class Real >
-template< class Vertex >
-int Octree< Real >::_addIsoPolygons( CoredMeshData< Vertex >& mesh , std::vector< std::pair< int , Vertex > >& polygon , bool polygonMesh , bool addBarycenter , int& vOffset )
-{
-	if( polygonMesh )
-	{
-		std::vector< int > vertices( polygon.size() );
-		for( int i=0 ; i<(int)polygon.size() ; i++ ) vertices[i] = polygon[polygon.size()-1-i].first;
-		mesh.addPolygon_s( vertices );
-		return 1;
-	}
-	if( polygon.size()>3 )
-	{
-		bool isCoplanar = false;
-		std::vector< int > triangle( 3 );
-
-		if( addBarycenter )
-			for( int i=0 ; i<(int)polygon.size() ; i++ )
-				for( int j=0 ; j<i ; j++ )
-					if( (i+1)%polygon.size()!=j && (j+1)%polygon.size()!=i )
-					{
-						Vertex v1 = polygon[i].second , v2 = polygon[j].second;
-						for( int k=0 ; k<3 ; k++ ) if( v1.point[k]==v2.point[k] ) isCoplanar = true;
-					}
-		if( isCoplanar )
-		{
-			Vertex c;
-			typename Vertex::Wrapper _c;
-			_c *= 0;
-			for( int i=0 ; i<(int)polygon.size() ; i++ ) _c += typename Vertex::Wrapper( polygon[i].second );
-			_c /= Real( polygon.size() );
-			c = Vertex( _c );
-			int cIdx;
-#pragma omp critical (add_barycenter_point_access)
-			{
-				cIdx = mesh.addOutOfCorePoint( c );
-				vOffset++;
-			}
-			for( int i=0 ; i<(int)polygon.size() ; i++ )
-			{
-				triangle[0] = polygon[ i                  ].first;
-				triangle[1] = cIdx;
-				triangle[2] = polygon[(i+1)%polygon.size()].first;
-				mesh.addPolygon_s( triangle );
-			}
-			return (int)polygon.size();
-		}
-		else
-		{
-			MinimalAreaTriangulation< Real > MAT;
-			std::vector< Point3D< Real > > vertices;
-			std::vector< TriangleIndex > triangles;
-			vertices.resize( polygon.size() );
-			// Add the points
-			for( int i=0 ; i<(int)polygon.size() ; i++ ) vertices[i] = polygon[i].second.point;
-			MAT.GetTriangulation( vertices , triangles );
-			for( int i=0 ; i<(int)triangles.size() ; i++ )
-			{
-				for( int j=0 ; j<3 ; j++ ) triangle[2-j] = polygon[ triangles[i].idx[j] ].first;
-				mesh.addPolygon_s( triangle );
-			}
-		}
-	}
-	else if( polygon.size()==3 )
-	{
-		std::vector< int > vertices( 3 );
-		for( int i=0 ; i<3 ; i++ ) vertices[2-i] = polygon[i].first;
-		mesh.addPolygon_s( vertices );
-	}
-	return (int)polygon.size()-2;
-}
diff --git a/Src/MultiGridOctreeData.SortedTreeNodes.inl b/Src/MultiGridOctreeData.SortedTreeNodes.inl
deleted file mode 100644
index 7efc9f8..0000000
--- a/Src/MultiGridOctreeData.SortedTreeNodes.inl
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-/////////////////////
-// SortedTreeNodes //
-/////////////////////
-SortedTreeNodes::SortedTreeNodes( void )
-{
-	_sliceStart = NullPointer( Pointer( int ) );
-	treeNodes = NullPointer( TreeOctNode* );
-	_levels = 0;
-}
-SortedTreeNodes::~SortedTreeNodes( void )
-{
-	if( _sliceStart ) for( int d=0 ; d<_levels ; d++ ) FreePointer( _sliceStart[d] );
-	FreePointer( _sliceStart );
-	DeletePointer( treeNodes );
-}
-void SortedTreeNodes::set( TreeOctNode& root , std::vector< int >* map )
-{
-	set( root );
-
-	if( map )
-	{
-		map->resize( _sliceStart[_levels-1][(size_t)1<<(_levels-1)] );
-		for( int i=0 ; i<_sliceStart[_levels-1][(size_t)1<<(_levels-1)] ; i++ ) (*map)[i] = treeNodes[i]->nodeData.nodeIndex;
-	}
-	for( int i=0 ; i<_sliceStart[_levels-1][(size_t)1<<(_levels-1)] ; i++ ) treeNodes[i]->nodeData.nodeIndex = i;
-}
-void SortedTreeNodes::set( TreeOctNode& root )
-{
-	_levels = root.maxDepth()+1;
-
-	if( _sliceStart ) for( int d=0 ; d<_levels ; d++ ) FreePointer( _sliceStart[d] );
-	FreePointer( _sliceStart );
-	DeletePointer( treeNodes );
-
-	_sliceStart = AllocPointer< Pointer( int ) >( _levels );
-	for( int l=0 ; l<_levels ; l++ )
-	{
-		_sliceStart[l] = AllocPointer< int >( ((size_t)1<<l)+1 );
-		memset( _sliceStart[l] , 0 , sizeof(int)*( ((size_t)1<<l)+1 ) );
-	}
-
-	// Count the number of nodes in each slice
-	for( TreeOctNode* node = root.nextNode() ; node ; node = root.nextNode( node ) ) if( !GetGhostFlag( node ) )
-	{
-		int d , off[3];
-		node->depthAndOffset( d , off );
-		_sliceStart[d][ off[2]+1 ]++;
-	}
-
-	// Get the start index for each slice
-	{
-		int levelOffset = 0;
-		for( int l=0 ; l<_levels ; l++ )
-		{
-			_sliceStart[l][0] = levelOffset;
-			for( int s=0 ; s<((size_t)1<<l); s++ ) _sliceStart[l][s+1] += _sliceStart[l][s];
-			levelOffset = _sliceStart[l][(size_t)1<<l];
-		}
-	}
-	// Allocate memory for the tree nodes
-	treeNodes = NewPointer< TreeOctNode* >( _sliceStart[_levels-1][(size_t)1<<(_levels-1)] );
-
-	// Add the tree nodes
-	for( TreeOctNode* node=root.nextNode() ; node ; node=root.nextNode( node ) ) if( !GetGhostFlag( node ) )
-	{
-		int d , off[3];
-		node->depthAndOffset( d , off );
-		treeNodes[ _sliceStart[d][ off[2] ]++ ] = node;
-	}
-
-	// Shift the slice offsets up since we incremented as we added
-	for( int l=0 ; l<_levels ; l++ )
-	{
-		for( int s=(1<<l) ; s>0 ; s-- ) _sliceStart[l][s] = _sliceStart[l][s-1];
-		_sliceStart[l][0] = l>0 ? _sliceStart[l-1][(size_t)1<<(l-1)] : 0;
-	}
-}
-SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::SliceTableData::cornerIndices( const TreeOctNode* node ) { return cTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::SliceTableData::cornerIndices( int idx ) { return cTable[ idx - nodeOffset ]; }
-const SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::SliceTableData::cornerIndices( const TreeOctNode* node ) const { return cTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-const SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::SliceTableData::cornerIndices( int idx ) const { return cTable[ idx - nodeOffset ]; }
-SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::SliceTableData::edgeIndices( const TreeOctNode* node ) { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::SliceTableData::edgeIndices( int idx ) { return eTable[ idx - nodeOffset ]; }
-const SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::SliceTableData::edgeIndices( const TreeOctNode* node ) const { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-const SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::SliceTableData::edgeIndices( int idx ) const { return eTable[ idx - nodeOffset ]; }
-SortedTreeNodes::SquareFaceIndices& SortedTreeNodes::SliceTableData::faceIndices( const TreeOctNode* node ) { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-SortedTreeNodes::SquareFaceIndices& SortedTreeNodes::SliceTableData::faceIndices( int idx ) { return fTable[ idx - nodeOffset ]; }
-const SortedTreeNodes::SquareFaceIndices& SortedTreeNodes::SliceTableData::faceIndices( const TreeOctNode* node ) const { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-const SortedTreeNodes::SquareFaceIndices& SortedTreeNodes::SliceTableData::faceIndices( int idx ) const { return fTable[ idx - nodeOffset ]; }
-SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::XSliceTableData::edgeIndices( const TreeOctNode* node ) { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::XSliceTableData::edgeIndices( int idx ) { return eTable[ idx - nodeOffset ]; }
-const SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::XSliceTableData::edgeIndices( const TreeOctNode* node ) const { return eTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-const SortedTreeNodes::SquareCornerIndices& SortedTreeNodes::XSliceTableData::edgeIndices( int idx ) const { return eTable[ idx - nodeOffset ]; }
-SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::XSliceTableData::faceIndices( const TreeOctNode* node ) { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::XSliceTableData::faceIndices( int idx ) { return fTable[ idx - nodeOffset ]; }
-const SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::XSliceTableData::faceIndices( const TreeOctNode* node ) const { return fTable[ node->nodeData.nodeIndex - nodeOffset ]; }
-const SortedTreeNodes::SquareEdgeIndices& SortedTreeNodes::XSliceTableData::faceIndices( int idx ) const { return fTable[ idx - nodeOffset ]; }
-
-void SortedTreeNodes::setSliceTableData( SliceTableData& sData , int depth , int offset , int threads ) const
-{
-	// [NOTE] This is structure is purely for determining adjacency and is independent of the FEM degree
-	typedef OctNode< TreeNodeData >::template ConstNeighborKey< 1 , 1 > ConstAdjacenctNodeKey;
-	if( offset<0 || offset>((size_t)1<<depth) ) return;
-	if( threads<=0 ) threads = 1;
-	// The vector of per-depth node spans
-	std::pair< int , int > span( _sliceStart[depth][ std::max< int >( 0 , offset-1 ) ] , _sliceStart[depth][ std::min< int >( (size_t)1<<depth , offset+1 ) ] );
-	sData.nodeOffset = span.first;
-	sData.nodeCount = span.second - span.first;
-
-	DeletePointer( sData._cMap ) ; DeletePointer( sData._eMap ) ; DeletePointer( sData._fMap );
-	DeletePointer( sData.cTable ) ; DeletePointer( sData.eTable ) ; DeletePointer( sData.fTable );
-	if( sData.nodeCount )
-	{
-		sData._cMap = NewPointer< int >( sData.nodeCount * Square::CORNERS );
-		sData._eMap = NewPointer< int >( sData.nodeCount * Square::EDGES );
-		sData._fMap = NewPointer< int >( sData.nodeCount * Square::FACES );
-		sData.cTable = NewPointer< typename SortedTreeNodes::SquareCornerIndices >( sData.nodeCount );
-		sData.eTable = NewPointer< typename SortedTreeNodes::SquareCornerIndices >( sData.nodeCount );
-		sData.fTable = NewPointer< typename SortedTreeNodes::SquareFaceIndices >( sData.nodeCount );
-		memset( sData._cMap , 0 , sizeof(int) * sData.nodeCount * Square::CORNERS );
-		memset( sData._eMap , 0 , sizeof(int) * sData.nodeCount * Square::EDGES );
-		memset( sData._fMap , 0 , sizeof(int) * sData.nodeCount * Square::FACES );
-	}
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( depth );
-#pragma omp parallel for num_threads( threads )
-	for( int i=span.first ; i<span.second ; i++ )
-	{
-		ConstAdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* node = treeNodes[i];
-		const TreeOctNode::ConstNeighbors< 3 >& neighbors = neighborKey.getNeighbors( node );
-		int d , off[3];
-		node->depthAndOffset( d , off );
-		int z;
-		if     ( off[2]==offset-1 ) z = 1;
-		else if( off[2]==offset   ) z = 0;
-		else fprintf( stderr , "[ERROR] Node out of bounds: %d %d\n" , offset , off[2] ) , exit( 0 );
-		// Process the corners
-		for( int x=0 ; x<2 ; x++ ) for( int y=0 ; y<2 ; y++ )
-		{
-			int c = Cube::CornerIndex( x , y , z );
-			int fc = Square::CornerIndex( x , y );
-			bool cornerOwner = true;
-			int ac = Cube::AntipodalCornerIndex(c); // The index of the node relative to the corner
-			for( int cc=0 ; cc<Cube::CORNERS ; cc++ ) // Iterate over the corner's cells
-			{
-				int xx , yy , zz;
-				Cube::FactorCornerIndex( cc , xx , yy , zz );
-				xx += x , yy += y , zz += z;
-				if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) && cc<ac ){ cornerOwner = false ; break; }
-			}
-			if( cornerOwner )
-			{
-				int myCount = (i - sData.nodeOffset) * Square::CORNERS + fc;
-				sData._cMap[ myCount ] = 1;
-				for( int cc=0 ; cc<Cube::CORNERS ; cc++ )
-				{
-					int xx , yy , zz;
-					Cube::FactorCornerIndex( cc , xx , yy , zz );
-					int ac = Square::CornerIndex( 1-xx , 1-yy );
-					xx += x , yy += y , zz += z;
-					if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) ) sData.cornerIndices( neighbors.neighbors[xx][yy][zz] )[ac] = myCount;
-				}
-			}
-		}
-		// Process the edges
-		for( int o=0 ; o<2 ; o++ ) for( int y=0 ; y<2 ; y++ )
-		{
-			int fe = Square::EdgeIndex( o , y );
-			bool edgeOwner = true;
-
-			int ac = Square::AntipodalCornerIndex( Square::CornerIndex( y , z ) );
-			for( int cc=0 ; cc<Square::CORNERS ; cc++ )
-			{
-				int ii , jj , xx , yy , zz;
-				Square::FactorCornerIndex( cc , ii , jj );
-				ii += y , jj += z;
-				switch( o )
-				{
-				case 0: yy = ii , zz = jj , xx = 1 ; break;
-				case 1: xx = ii , zz = jj , yy = 1 ; break;
-				}
-				if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) && cc<ac ){ edgeOwner = false ; break; }
-			}
-			if( edgeOwner )
-			{
-				int myCount = ( i - sData.nodeOffset ) * Square::EDGES + fe;
-				sData._eMap[ myCount ] = 1;
-				// Set all edge indices
-				for( int cc=0 ; cc<Square::CORNERS ; cc++ )
-				{
-					int ii , jj , aii , ajj , xx , yy , zz;
-					Square::FactorCornerIndex( cc , ii , jj );
-					Square::FactorCornerIndex( Square::AntipodalCornerIndex( cc ) , aii , ajj );
-					ii += y , jj += z;
-					switch( o )
-					{
-					case 0: yy = ii , zz = jj , xx = 1 ; break;
-					case 1: xx = ii , zz = jj , yy = 1 ; break;
-					}
-					if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) ) sData.edgeIndices( neighbors.neighbors[xx][yy][zz] )[ Square::EdgeIndex( o , aii ) ] = myCount;
-				}
-			}
-		}
-		// Process the Faces
-		{
-			bool faceOwner = !( IsActiveNode( neighbors.neighbors[1][1][2*z] ) && !z );
-			if( faceOwner )
-			{
-				int myCount = ( i - sData.nodeOffset ) * Square::FACES;
-				sData._fMap[ myCount ] = 1;
-				// Set the face indices
-				sData.faceIndices( node )[0] = myCount;
-				if( IsActiveNode( neighbors.neighbors[1][1][2*z] ) ) sData.faceIndices( neighbors.neighbors[1][1][2*z] )[0] = myCount;
-			}
-		}
-	}
-	int cCount = 0 , eCount = 0 , fCount = 0;
-
-	for( size_t i=0 ; i<sData.nodeCount * Square::CORNERS ; i++ ) if( sData._cMap[i] ) sData._cMap[i] = cCount++;
-	for( size_t i=0 ; i<sData.nodeCount * Square::EDGES   ; i++ ) if( sData._eMap[i] ) sData._eMap[i] = eCount++;
-	for( size_t i=0 ; i<sData.nodeCount * Square::FACES   ; i++ ) if( sData._fMap[i] ) sData._fMap[i] = fCount++;
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<sData.nodeCount ; i++ )
-	{
-		for( int j=0 ; j<Square::CORNERS ; j++ ) sData.cTable[i][j] = sData._cMap[ sData.cTable[i][j] ];
-		for( int j=0 ; j<Square::EDGES   ; j++ ) sData.eTable[i][j] = sData._eMap[ sData.eTable[i][j] ];
-		for( int j=0 ; j<Square::FACES   ; j++ ) sData.fTable[i][j] = sData._fMap[ sData.fTable[i][j] ];
-	}
-
-	sData.cCount = cCount , sData.eCount = eCount , sData.fCount = fCount;
-}
-void SortedTreeNodes::setXSliceTableData( XSliceTableData& sData , int depth , int offset , int threads ) const
-{
-	typedef OctNode< TreeNodeData >::template ConstNeighborKey< 1 , 1 > ConstAdjacenctNodeKey;
-	if( offset<0 || offset>=((size_t)1<<depth) ) return;
-	if( threads<=0 ) threads = 1;
-	// The vector of per-depth node spans
-	std::pair< int , int > span( _sliceStart[depth][offset] , _sliceStart[depth][offset+1] );
-	sData.nodeOffset = span.first;
-	sData.nodeCount = span.second - span.first;
-
-	DeletePointer( sData._eMap ) ; DeletePointer( sData._fMap );
-	DeletePointer( sData.eTable ) ; DeletePointer( sData.fTable );
-	if( sData.nodeCount )
-	{
-		sData._eMap = NewPointer< int >( sData.nodeCount * Square::CORNERS );
-		sData._fMap = NewPointer< int >( sData.nodeCount * Square::EDGES );
-		sData.eTable = NewPointer< typename SortedTreeNodes::SquareCornerIndices >( sData.nodeCount );
-		sData.fTable = NewPointer< typename SortedTreeNodes::SquareEdgeIndices >( sData.nodeCount );
-		memset( sData._eMap , 0 , sizeof(int) * sData.nodeCount * Square::CORNERS );
-		memset( sData._fMap , 0 , sizeof(int) * sData.nodeCount * Square::EDGES );
-	}
-
-	std::vector< ConstAdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( depth );
-#pragma omp parallel for num_threads( threads )
-	for( int i=span.first ; i<span.second ; i++ )
-	{
-		ConstAdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* node = treeNodes[i];
-		const TreeOctNode::ConstNeighbors<3>& neighbors = neighborKey.getNeighbors( node );
-		int d , off[3];
-		node->depthAndOffset( d , off );
-		// Process the edges
-		int o=2;
-		for( int x=0 ; x<2 ; x++ ) for( int y=0 ; y<2 ; y++ )
-		{
-			int fc = Square::CornerIndex( x , y );
-			bool edgeOwner = true;
-
-			int ac = Square::AntipodalCornerIndex( Square::CornerIndex( x , y ) );
-			for( int cc=0 ; cc<Square::CORNERS ; cc++ )
-			{
-				int ii , jj , xx , yy , zz;
-				Square::FactorCornerIndex( cc , ii , jj );
-				ii += x , jj += y;
-				xx = ii , yy = jj , zz = 1;
-				if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) && cc<ac ){ edgeOwner = false ; break; }
-			}
-			if( edgeOwner )
-			{
-				int myCount = ( i - sData.nodeOffset ) * Square::CORNERS + fc;
-				sData._eMap[ myCount ] = 1;
-
-				// Set all edge indices
-				for( int cc=0 ; cc<Square::CORNERS ; cc++ )
-				{
-					int ii , jj , aii , ajj , xx , yy , zz;
-					Square::FactorCornerIndex( cc , ii , jj );
-					Square::FactorCornerIndex( Square::AntipodalCornerIndex( cc ) , aii , ajj );
-					ii += x , jj += y;
-					xx = ii , yy = jj , zz = 1;
-					if( IsActiveNode( neighbors.neighbors[xx][yy][zz] ) ) sData.edgeIndices( neighbors.neighbors[xx][yy][zz] )[ Square::CornerIndex( aii , ajj ) ] = myCount;
-				}
-			}
-		}
-		// Process the faces
-		for( int o=0 ; o<2 ; o++ ) for( int y=0 ; y<2 ; y++ )
-		{
-			bool faceOwner;
-			if( o==0 ) faceOwner = !( IsActiveNode( neighbors.neighbors[1][2*y][1] ) && !y );
-			else       faceOwner = !( IsActiveNode( neighbors.neighbors[2*y][1][1] ) && !y );
-			if( faceOwner )
-			{
-				int fe = Square::EdgeIndex( o , y );
-				int ae = Square::EdgeIndex( o , 1-y );
-				int myCount = ( i - sData.nodeOffset ) * Square::EDGES + fe;
-				sData._fMap[ myCount ] = 1;
-				// Set the face indices
-				sData.faceIndices( node )[fe] = myCount;
-				if( o==0 && IsActiveNode( neighbors.neighbors[1][2*y][1] ) ) sData.faceIndices( neighbors.neighbors[1][2*y][1] )[ae] = myCount;
-				if( o==1 && IsActiveNode( neighbors.neighbors[2*y][1][1] ) ) sData.faceIndices( neighbors.neighbors[2*y][1][1] )[ae] = myCount;
-			}
-		}
-	}
-	int eCount = 0 , fCount = 0;
-
-	for( size_t i=0 ; i<sData.nodeCount * Square::CORNERS ; i++ ) if( sData._eMap[i] ) sData._eMap[i] = eCount++;
-	for( size_t i=0 ; i<sData.nodeCount * Square::EDGES   ; i++ ) if( sData._fMap[i] ) sData._fMap[i] = fCount++;
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<sData.nodeCount ; i++ )
-	{
-		for( int j=0 ; j<Square::CORNERS ; j++ ) sData.eTable[i][j] = sData._eMap[ sData.eTable[i][j] ];
-		for( int j=0 ; j<Square::EDGES   ; j++ ) sData.fTable[i][j] = sData._fMap[ sData.fTable[i][j] ];
-	}
-
-	sData.eCount = eCount , sData.fCount = fCount;
-}
diff --git a/Src/MultiGridOctreeData.System.inl b/Src/MultiGridOctreeData.System.inl
deleted file mode 100644
index f1cea19..0000000
--- a/Src/MultiGridOctreeData.System.inl
+++ /dev/null
@@ -1,2274 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-template< class Real , int Degree , bool HasGradients >
-struct _ConstraintCalculator_
-{
-	static inline Real _CalculateConstraint_( const PointData< Real , HasGradients >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz , Real valueWeight , Real gradientWeight );
-	static inline Real _CalculateConstraint_( const PointData< Real , HasGradients >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz );
-#if POINT_DATA_RES
-	static inline void _CalculateCoarser_( int c , PointData< Real , HasGradients >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight );
-#else // !POINT_DATA_RES
-	static inline void _CalculateCoarser_( PointData< Real , HasGradients >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight );
-#endif // POINT_DATA_RES
-
-};
-template< class Real , int Degree >
-struct _ConstraintCalculator_< Real , Degree , false >
-{
-	static inline Real _CalculateConstraint_( const PointData< Real , false >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz , Real valueWeight , Real gradientWeight )
-	{
-#if POINT_DATA_RES
-		Real constraint = 0;
-		for( int c=0 ; c<PointData< Real , false >::SAMPLES ; c++ ) if( p[c].weight ) 
-		{
-			const Point3D< Real > q = p[c].position;
-			constraint += (Real)( px( q[0] ) * py( q[1] ) * pz( q[2] ) * p[c].weight * p[c].value );
-		}
-		return constraint * valueWeight;
-#else // !POINT_DATA_RES
-		const Point3D< Real > q = p.position;
-		return (Real)( px( q[0] ) * py( q[1] ) * pz( q[2] ) * p.weight * p.value ) * valueWeight;
-#endif // POINT_DATA_RES
-	}
-	static inline Real _CalculateConstraint_( const PointData< Real , false >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz )
-	{
-#if POINT_DATA_RES
-		Real constraint = 0;
-		for( int c=0 ; c<PointData< Real , false >::SAMPLES ; c++ ) if( p[c].weight ) 
-		{
-			const Point3D< Real > q = p[c].position;
-			constraint += (Real)( px( q[0] ) * py( q[1] ) * pz( q[2] ) * p[c]._value );
-		}
-		return constraint;
-#else // !POINT_DATA_RES
-		const Point3D< Real > q = p.position;
-		return (Real)( px( q[0] ) * py( q[1] ) * pz( q[2] ) * p._value );
-#endif // POINT_DATA_RES
-	}
-#if POINT_DATA_RES
-	static inline void _CalculateCoarser_( int c , PointData< Real , false >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight ){ p[c]._value = value * valueWeight * p[c].weight; }
-#else // !POINT_DATA_RES
-	static inline void _CalculateCoarser_( PointData< Real , false >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight ){ p._value = value * valueWeight * p.weight; }
-#endif // POINT_DATA_RES
-};
-template< class Real , int Degree >
-struct _ConstraintCalculator_< Real , Degree , true >
-{
-	static inline Real _CalculateConstraint_( const PointData< Real , true >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz , Real valueWeight , Real gradientWeight )
-	{
-#if POINT_DATA_RES
-		Real constraint = 0;
-		for( int c=0 ; c<PointData< Real , true >::SAMPLES ; c++ ) if( p[c].weight ) 
-		{
-			const Point3D< Real > q = p[c].position;
-			double _px = px( q[0] ) , _py = py( q[1] ) , _pz = pz( q[2] );
-			constraint +=
-				(
-					(Real)( _px * _py * _pz * p[c].value ) * valueWeight +
-					Point3D< Real >::Dot( Point3D< Real >( dpx( q[0] ) * _py * _pz , _px * dpy( q[1] ) * _pz , _px * _py * dpz( q[2] ) ) , p[c].gradient ) * gradientWeight
-				) * p[c].weight;
-		}
-		return constraint;
-#else // !POINT_DATA_RES
-		const Point3D< Real > q = p.position;
-		double _px = px( q[0] ) , _py = py( q[1] ) , _pz = pz( q[2] );
-		return
-			(
-			(Real)( _px * _py * _pz * p.value ) * valueWeight +
-				Point3D< Real >::Dot( Point3D< Real >( dpx( q[0] ) * _py * _pz , _px * dpy( q[1] ) * _pz , _px * _py * dpz( q[2] ) ) , p.gradient ) * gradientWeight
-			) * p.weight;
-#endif // POINT_DATA_RES
-	}
-	static inline Real _CalculateConstraint_( const PointData< Real , true >& p , const Polynomial< Degree >& px , const Polynomial< Degree >& py , const Polynomial< Degree >& pz , const Polynomial< Degree >& dpx , const Polynomial< Degree >& dpy , const Polynomial< Degree >& dpz )
-	{
-#if POINT_DATA_RES
-		Real constraint = 0;
-		for( int c=0 ; c<PointData< Real , true >::SAMPLES ; c++ ) if( p[c].weight ) 
-		{
-			const Point3D< Real > q = p[c].position;
-			double _px = px( q[0] ) , _py = py( q[1] ) , _pz = pz( q[2] );
-			constraint +=
-				(Real)( _px * _py * _pz * p[c]._value ) +
-				Point3D< Real >::Dot( Point3D< Real >( dpx( q[0] ) * _py * _pz , _px * dpy( q[1] ) * _pz , _px * _py * dpz( q[2] ) ) , p[c]._gradient );
-		}
-		return constraint;
-#else // !POINT_DATA_RES
-		const Point3D< Real > q = p.position;
-		double _px = px( q[0] ) , _py = py( q[1] ) , _pz = pz( q[2] );
-		return
-		(Real)( _px * _py * _pz * p._value ) +
-			Point3D< Real >::Dot( Point3D< Real >( dpx( q[0] ) * _py * _pz , _px * dpy( q[1] ) * _pz , _px * _py * dpz( q[2] ) ) , p._gradient );
-#endif // POINT_DATA_RES
-	}
-#if POINT_DATA_RES
-	static inline void _CalculateCoarser_( int c , PointData< Real , true >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight ){ p[c]._value = value * valueWeight * p[c].weight ; p[c]._gradient = gradient * gradientWeight * p[c].weight; }
-#else // !POINT_DATA_RES
-	static inline void _CalculateCoarser_( PointData< Real , true >& p , Real value , Point3D< Real > gradient , Real valueWeight , Real gradientWeight ){ p._value = value * valueWeight * p.weight ; p._gradient = gradient * gradientWeight * p.weight; }
-#endif // POINT_DATA_RES
-};
-
-template< >
-template< class I >
-double FEMSystemFunctor< 0 , BOUNDARY_FREE >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight;
-#undef D_DOT
-}
-template< >
-template< class I >
-double FEMSystemFunctor< 0 , BOUNDARY_NEUMANN >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight;
-#undef D_DOT
-}
-template< >
-template< class I >
-double FEMSystemFunctor< 0 , BOUNDARY_DIRICHLET >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight;
-#undef D_DOT
-}
-template< >
-template< class I >
-double FEMSystemFunctor< 1 , BOUNDARY_FREE >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 ) , d11[] = D_DOT( 1 , 1 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight 
-		+
-		(
-			d11[0] * d00[1] * d00[2] +
-			d11[1] * d00[2] * d00[0] +
-			d11[2] * d00[0] * d00[1]
-			) * lapWeight;
-#undef D_DOT
-}
-template< >
-template< class I >
-double FEMSystemFunctor< 1 , BOUNDARY_NEUMANN >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 ) , d11[] = D_DOT( 1 , 1 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight 
-		+
-		(
-			d11[0] * d00[1] * d00[2] +
-			d11[1] * d00[2] * d00[0] +
-			d11[2] * d00[0] * d00[1]
-			) * lapWeight;
-#undef D_DOT
-}
-template< >
-template< class I >
-double FEMSystemFunctor< 1 , BOUNDARY_DIRICHLET >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 ) , d11[] = D_DOT( 1 , 1 );
-	return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight 
-		+
-		(
-			d11[0] * d00[1] * d00[2] +
-			d11[1] * d00[2] * d00[0] +
-			d11[2] * d00[0] * d00[1]
-			) * lapWeight;
-#undef D_DOT
-}
-
-template< int FEMDegree , BoundaryType BType >
-template< class I >
-double FEMSystemFunctor< FEMDegree , BType >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , D1 , D2 ) , integrator.dot( off1[1] , off2[1] , D1 , D2 ) , integrator.dot( off1[2] , off2[2] , D1 , D2 ) }
-	double d00[] = D_DOT( 0 , 0 ) , d02[] = D_DOT( 0 , 2 ) , d20[] = D_DOT( 2 , 0 ) , d22[] = D_DOT( 2 , 2 ) , d11[] = D_DOT( 1 , 1 );
-	return
-		(
-		d00[0] * d00[1] * d00[2]
-		) * massWeight 
-		+
-		(
-		d11[0] * d00[1] * d00[2] +
-		d11[1] * d00[2] * d00[0] +
-		d11[2] * d00[0] * d00[1]
-		) * lapWeight
-		+
-		(
-		d22[0] * d00[1] * d00[2] +							// Unmixed
-		d22[1] * d00[2] * d00[0] +							// Unmixed
-		d22[2] * d00[0] * d00[1] +							// Unmixed
-		d00[0] * ( d02[1] * d20[2] + d20[1] * d02[2] ) +	//   Mixed
-		d00[1] * ( d02[2] * d20[0] + d20[2] * d02[0] ) +	//   Mixed
-		d00[2] * ( d02[0] * d20[1] + d20[0] * d02[1] )		//   Mixed
-		) * biLapWeight;
-#undef D_DOT
-}
-template< int SFDegree , BoundaryType SFBType , int FEMDegree , BoundaryType FEMBType >
-template< bool Reverse , class I >
-double FEMSFConstraintFunctor< SFDegree , SFBType , FEMDegree , FEMBType >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) , integrator.dot( off1[1] , off2[1] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) , integrator.dot( off1[2] , off2[2] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) }
-	double d00[] = D_DOT( 0 , 0 ) , d02[] = D_DOT( 0 , 2 ) , d20[] = D_DOT( 2 , 0 ) , d22[] = D_DOT( 2 , 2 ) , d11[] = D_DOT( 1 , 1 );
-	if( SFDegree==0 || FEMDegree==0 )
-		return d00[0] * d00[1] * d00[2] * massWeight;
-	else if( SFDegree<=1 || FEMDegree<=1 ) 
-		return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight 
-		+
-		(
-			d11[0] * d00[1] * d00[2] +
-			d11[1] * d00[2] * d00[0] +
-			d11[2] * d00[0] * d00[1]
-			) * lapWeight;
-	else
-		return
-		(
-			d00[0] * d00[1] * d00[2]
-			) * massWeight 
-		+
-		(
-			d11[0] * d00[1] * d00[2] +
-			d11[1] * d00[2] * d00[0] +
-			d11[2] * d00[0] * d00[1]
-			) * lapWeight
-		+
-		(
-			d22[0] * d00[1] * d00[2] +							// Unmixed
-			d22[1] * d00[2] * d00[0] +							// Unmixed
-			d22[2] * d00[0] * d00[1] +							// Unmixed
-			d00[0] * ( d02[1] * d20[2] + d20[1] * d02[2] ) +	//   Mixed
-			d00[1] * ( d02[2] * d20[0] + d20[2] * d02[0] ) +	//   Mixed
-			d00[2] * ( d02[0] * d20[1] + d20[0] * d02[1] )		//   Mixed
-			) * biLapWeight;
-#undef D_DOT
-}
-template< int VFDegree , BoundaryType VFBType , int FEMDegree , BoundaryType FEMBType >
-template< bool Reverse , class I >
-Point3D< double > FEMVFConstraintFunctor< VFDegree , VFBType , FEMDegree , FEMBType >::_integrate( const I& integrator , const int off1[] , const int off2[] ) const
-{
-#define D_DOT( D1 , D2 ) { integrator.dot( off1[0] , off2[0] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) , integrator.dot( off1[1] , off2[1] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) , integrator.dot( off1[2] , off2[2] , Reverse ? D2 : D1 , Reverse ? D1 : D2 ) }
-	if( FEMDegree==0 ) fprintf( stderr , "[ERROR] FEMDegree does not support differentiation: %d\n" , FEMDegree  ) , exit( 0 );
-	if( VFDegree==0 || FEMDegree==1 )
-	{
-		double d00[] = D_DOT( 0 , 0 ) , d01[] = D_DOT( 0 , 1 );
-		return
-			Point3D< double >
-			(
-				d01[0] * d00[1] * d00[2] ,
-				d01[1] * d00[2] * d00[0] ,
-				d01[2] * d00[0] * d00[1]
-			) * lapWeight;
-	}
-	else
-	{
-		double d00[] = D_DOT( 0 , 0 ) , d10[] = D_DOT( 1 , 0 ) , d01[] = D_DOT( 0 , 1 ) , d02[] = D_DOT( 0 , 2 ) , d12[] = D_DOT( 1 , 2 );
-		return
-			Point3D< double >
-			(
-				d01[0] * d00[1] * d00[2] ,
-				d01[1] * d00[2] * d00[0] ,
-				d01[2] * d00[0] * d00[1]
-			) * lapWeight
-			+
-			Point3D< double >
-			(
-				d12[0] * d00[1] * d00[2] + d10[0] * ( d00[1] * d02[2] + d02[1] * d00[2] ) , 
-				d12[1] * d00[2] * d00[0] + d10[1] * ( d00[2] * d02[0] + d02[2] * d00[0] ) , 
-				d12[2] * d00[0] * d00[1] + d10[2] * ( d00[0] * d02[1] + d02[0] * d00[1] )
-			) * biLapWeight;
-	}
-#undef D_DOT
-}
-
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< bool Reverse , class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralConstraintStencil( const _FEMSystemFunctor& F , const Integrator& integrator , Stencil< double , OverlapSize >& stencil  )
-{
-	int center = ( 1<<integrator.depth() )>>1;
-	int offset[] = { center , center , center };
-	for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-	{
-		int _offset[] = { x+center-OverlapEnd , y+center-OverlapEnd , z+center-OverlapEnd };
-		stencil( x , y , z ) = F.template integrate< Reverse >( integrator , _offset , offset );
-	}
-}
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< bool Reverse , class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralConstraintStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil< double , OverlapSize > stencils[2][2][2] )
-{
-	int center = ( 1<<integrator.childDepth() )>>1;
-	// [NOTE] We want the center to be at the first node of the brood
-	// Which is not the case when childDepth is 1.
-	center = ( center>>1 )<<1;
-	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ )
-	{
-		int offset[] = { center+i , center+j , center+k };
-		for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-		{
-			int _offset[] = { x+center/2-OverlapEnd , y+center/2-OverlapEnd , z+center/2-OverlapEnd };
-			stencils[i][j][k]( x , y , z ) = F.template integrate< Reverse >( integrator , _offset , offset );
-		}
-	}
-}
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< bool Reverse , class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralConstraintStencil( const _FEMSystemFunctor& F , const Integrator& integrator , Stencil< Point3D< double > , OverlapSize >& stencil  )
-{
-	int center = ( 1<<integrator.depth() )>>1;
-	int offset[] = { center , center , center };
-	for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-	{
-		int _offset[] = { x+center-OverlapEnd , y+center-OverlapEnd , z+center-OverlapEnd };
-		stencil( x , y , z ) = F.template integrate< Reverse >( integrator , _offset , offset );
-	}
-}
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< bool Reverse , class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralConstraintStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil< Point3D< double > , OverlapSize > stencils[2][2][2] )
-{
-	int center = ( 1<<integrator.childDepth() )>>1;
-	// [NOTE] We want the center to be at the first node of the brood
-	// Which is not the case when childDepth is 1.
-	center = ( center>>1 )<<1;
-	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ )
-	{
-		int offset[] = { center+i , center+j , center+k };
-		for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-		{
-			int _offset[] = { x+center/2-OverlapEnd , y+center/2-OverlapEnd , z+center/2-OverlapEnd };
-			stencils[i][j][k]( x , y , z ) = F.template integrate< Reverse >( integrator , _offset , offset );
-		}
-	}
-}
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralSystemStencil( const _FEMSystemFunctor& F , const Integrator& integrator , Stencil< double , OverlapSize >& stencil )
-{
-	int center = ( 1<<integrator.depth() )>>1;
-	int offset[] = { center , center , center };
-	for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-	{
-		int _offset[] = { x+center-OverlapEnd , y+center-OverlapEnd , z+center-OverlapEnd };
-		stencil( x , y , z ) = F.integrate( integrator , _offset , offset );
-	}
-}
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-template< class _FEMSystemFunctor >
-void SystemCoefficients< Degree1 , BType1 , Degree2 , BType2 >::SetCentralSystemStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil< double , OverlapSize > stencils[2][2][2] )
-{
-	int center = ( 1<<integrator.childDepth() )>>1;
-	// [NOTE] We want the center to be at the first node of the brood
-	// Which is not the case when childDepth is 1.
-	center = ( center>>1 )<<1;
-	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ )
-	{
-		int offset[] = { center+i , center+j , center+k };
-		for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-		{
-			int _offset[] = { x+center/2-OverlapEnd , y+center/2-OverlapEnd , z+center/2-OverlapEnd };
-			stencils[i][j][k]( x , y , z ) = F.integrate( integrator , _offset , offset );
-		}
-	}
-}
-
-template< class Real >
-template< int FEMDegree >
-void Octree< Real >::_setMultiColorIndices( int start , int end , std::vector< std::vector< int > >& indices ) const
-{
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-
-	const int modulus = OverlapRadius+1;
-	indices.resize( modulus*modulus*modulus );
-	int count[modulus*modulus*modulus];
-	memset( count , 0 , sizeof(int)*modulus*modulus*modulus );
-#pragma omp parallel for num_threads( threads )
-	for( int i=start ; i<end ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		// [NOTE] We have to use the global offset so that it's positive
-		int d , off[3];
-		_sNodes.treeNodes[i]->depthAndOffset( d , off );
-		int idx = (modulus*modulus) * ( off[2]%modulus ) + modulus * ( off[1]%modulus ) + ( off[0]%modulus );
-#pragma omp atomic
-		count[idx]++;
-	}
-
-	for( int i=0 ; i<modulus*modulus*modulus ; i++ ) indices[i].reserve( count[i] ) , count[i]=0;
-	for( int i=start ; i<end ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		int d , off[3];
-		_sNodes.treeNodes[i]->depthAndOffset( d , off );
-		int idx = (modulus*modulus) * ( off[2]%modulus ) + modulus * ( off[1]%modulus ) + ( off[0]%modulus );
-		indices[idx].push_back( i - start );
-	}
-}
-
-template< class Real >
-template< class C , int FEMDegree , BoundaryType BType >
-void Octree< Real >::_downSample( LocalDepth highDepth , DenseNodeData< C , FEMDegree >& constraints ) const
-{
-	typedef typename TreeOctNode::NeighborKey< -BSplineSupportSizes< FEMDegree >::UpSampleStart , BSplineSupportSizes< FEMDegree >::UpSampleEnd > UpSampleKey;
-
-	LocalDepth lowDepth = highDepth-1;
-	if( lowDepth<0 ) return;
-
-	typename BSplineEvaluationData< FEMDegree , BType >::UpSampleEvaluator upSampleEvaluator;
-	BSplineEvaluationData< FEMDegree , BType >::SetUpSampleEvaluator( upSampleEvaluator , lowDepth );
-	std::vector< UpSampleKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
-
-	Stencil< double , BSplineSupportSizes< FEMDegree >::UpSampleSize > upSampleStencil;
-	int lowCenter = ( 1<<lowDepth )>>1;
-	for( int i=0 ; i<BSplineSupportSizes< FEMDegree >::UpSampleSize ; i++ ) for( int j=0 ; j<BSplineSupportSizes< FEMDegree >::UpSampleSize ; j++ ) for( int k=0 ; k<BSplineSupportSizes< FEMDegree >::UpSampleSize ; k++ )
-		upSampleStencil( i , j , k ) =
-		upSampleEvaluator.value( lowCenter , 2*lowCenter + i + BSplineSupportSizes< FEMDegree >::UpSampleStart ) *
-		upSampleEvaluator.value( lowCenter , 2*lowCenter + j + BSplineSupportSizes< FEMDegree >::UpSampleStart ) *
-		upSampleEvaluator.value( lowCenter , 2*lowCenter + k + BSplineSupportSizes< FEMDegree >::UpSampleStart );
-
-	// Iterate over all (valid) parent nodes
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		TreeOctNode* pNode = _sNodes.treeNodes[i];
-
-		UpSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( pNode , d , off );
-
-		neighborKey.template getNeighbors< false >( pNode );
-
-		// Get the child neighbors
-		typename TreeOctNode::Neighbors< BSplineSupportSizes< FEMDegree >::UpSampleSize > neighbors;
-		neighborKey.template getChildNeighbors< false >( 0 , _localToGlobal( d ) , neighbors );
-
-		C& coarseConstraint = constraints[i];
-
-		// Want to make sure test if contained children are interior.
-		// This is more conservative because we are test that overlapping children are interior
-		bool isInterior = _isInteriorlyOverlapped< FEMDegree , FEMDegree >( pNode );
-		if( isInterior )
-		{
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::UpSampleSize ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::UpSampleSize ; jj++ ) for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::UpSampleSize ; kk++ )
-			{
-				const TreeOctNode* cNode = neighbors.neighbors[ii][jj][kk];
-				if( IsActiveNode( cNode ) ) coarseConstraint += (C)( constraints[ cNode->nodeData.nodeIndex ] * upSampleStencil( ii , jj , kk ) );
-			}
-		}
-		else
-		{
-			double upSampleValues[3][ BSplineSupportSizes< FEMDegree >::UpSampleSize ];
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::UpSampleSize ; ii++ )
-			{
-				upSampleValues[0][ii] = upSampleEvaluator.value( off[0] , 2*off[0] + ii + BSplineSupportSizes< FEMDegree >::UpSampleStart );
-				upSampleValues[1][ii] = upSampleEvaluator.value( off[1] , 2*off[1] + ii + BSplineSupportSizes< FEMDegree >::UpSampleStart );
-				upSampleValues[2][ii] = upSampleEvaluator.value( off[2] , 2*off[2] + ii + BSplineSupportSizes< FEMDegree >::UpSampleStart );
-			}
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::UpSampleSize ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::UpSampleSize ; jj++ )
-			{
-				double dxy = upSampleValues[0][ii] * upSampleValues[1][jj];
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::UpSampleSize ; kk++ )
-				{
-					const TreeOctNode* cNode = neighbors.neighbors[ii][jj][kk];
-					if( _isValidFEMNode( cNode ) ) coarseConstraint += (C)( constraints[ cNode->nodeData.nodeIndex ] * dxy * upSampleValues[2][kk] );
-				}
-			}
-		}
-	}
-}
-template< class Real >
-template< class C , int FEMDegree , BoundaryType BType >
-void Octree< Real >::_upSample( LocalDepth highDepth , DenseNodeData< C , FEMDegree >& coefficients ) const
-{
-	static const int  LeftDownSampleRadius = -( ( BSplineSupportSizes< FEMDegree >::DownSample0Start < BSplineSupportSizes< FEMDegree >::DownSample1Start ) ? BSplineSupportSizes< FEMDegree >::DownSample0Start : BSplineSupportSizes< FEMDegree >::DownSample1Start );
-	static const int RightDownSampleRadius =  ( ( BSplineSupportSizes< FEMDegree >::DownSample0End   > BSplineSupportSizes< FEMDegree >::DownSample1End   ) ? BSplineSupportSizes< FEMDegree >::DownSample0End   : BSplineSupportSizes< FEMDegree >::DownSample1End   );
-	typedef TreeOctNode::NeighborKey< LeftDownSampleRadius , RightDownSampleRadius > DownSampleKey;
-
-	LocalDepth lowDepth = highDepth-1;
-	if( lowDepth<0 ) return;
-
-	typename BSplineEvaluationData< FEMDegree , BType >::UpSampleEvaluator upSampleEvaluator;
-	BSplineEvaluationData< FEMDegree , BType >::SetUpSampleEvaluator( upSampleEvaluator , lowDepth );
-	std::vector< DownSampleKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
-	
-	static const int DownSampleSize = BSplineSupportSizes< FEMDegree >::DownSample0Size > BSplineSupportSizes< FEMDegree >::DownSample1Size ? BSplineSupportSizes< FEMDegree >::DownSample0Size : BSplineSupportSizes< FEMDegree >::DownSample1Size;
-	Stencil< double , DownSampleSize > downSampleStencils[ Cube::CORNERS ];
-	int lowCenter = ( 1<<lowDepth )>>1;
-	for( int c=0 ; c<Cube::CORNERS ; c++ )
-	{
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( c , cx , cy , cz );
-		for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ )
-			for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-					downSampleStencils[c]( ii , jj , kk ) = 
-					upSampleEvaluator.value( lowCenter + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] , 2*lowCenter + cx ) *
-					upSampleEvaluator.value( lowCenter + jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] , 2*lowCenter + cy ) *
-					upSampleEvaluator.value( lowCenter + kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] , 2*lowCenter + cz ) ;
-	}
-
-	// For Dirichlet constraints, can't get to all children from parents because boundary nodes are invalid
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		TreeOctNode *cNode = _sNodes.treeNodes[i] , *pNode = cNode->parent;
-		int c = (int)( cNode-pNode->children );
-
-		DownSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( pNode , d , off );
-		typename TreeOctNode::Neighbors< LeftDownSampleRadius + RightDownSampleRadius + 1 >& neighbors = neighborKey.template getNeighbors< false >( pNode );
-
-		// Want to make sure test if contained children are interior.
-		// This is more conservative because we are test that overlapping children are interior
-		bool isInterior = _isInteriorlyOverlapped< FEMDegree , FEMDegree >( pNode );
-
-		C& fineCoefficient = coefficients[ cNode->nodeData.nodeIndex ];
-
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( c , cx , cy , cz );
-
-		if( isInterior )
-		{
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-			{
-				int _ii = ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] + LeftDownSampleRadius;
-				int _jj = jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] + LeftDownSampleRadius;
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-				{
-					int _kk = kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] + LeftDownSampleRadius;
-					const TreeOctNode* _pNode = neighbors.neighbors[_ii][_jj][_kk];
-					if( _pNode ) fineCoefficient += (C)( coefficients[ _pNode->nodeData.nodeIndex ] * downSampleStencils[c]( ii , jj , kk ) );
-				}
-			}
-		}
-		else
-		{
-			double downSampleValues[3][ BSplineSupportSizes< FEMDegree >::DownSample0Size > BSplineSupportSizes< FEMDegree >::DownSample1Size ? BSplineSupportSizes< FEMDegree >::DownSample0Size : BSplineSupportSizes< FEMDegree >::DownSample1Size ];
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) downSampleValues[0][ii] = upSampleEvaluator.value( off[0] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] , 2*off[0] + cx );
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; ii++ ) downSampleValues[1][ii] = upSampleEvaluator.value( off[1] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] , 2*off[1] + cy );
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; ii++ ) downSampleValues[2][ii] = upSampleEvaluator.value( off[2] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] , 2*off[2] + cz );
-
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-			{
-				double dxy = downSampleValues[0][ii] * downSampleValues[1][jj];
-				int _ii = ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] + LeftDownSampleRadius;
-				int _jj = jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] + LeftDownSampleRadius;
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-				{
-					int _kk = kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] + LeftDownSampleRadius;
-					const TreeOctNode* _pNode = neighbors.neighbors[_ii][_jj][_kk];
-					if( _isValidFEMNode( _pNode ) ) fineCoefficient += (C)( coefficients[ _pNode->nodeData.nodeIndex ] * dxy * downSampleValues[2][kk] );
-				}
-			}
-		}
-	}
-}
-
-template< class Real >
-template< class C , int FEMDegree , BoundaryType BType >
-void Octree< Real >::_UpSample( LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients , int threads )
-{
-	static const int  LeftDownSampleRadius = -( ( BSplineSupportSizes< FEMDegree >::DownSample0Start < BSplineSupportSizes< FEMDegree >::DownSample1Start ) ? BSplineSupportSizes< FEMDegree >::DownSample0Start : BSplineSupportSizes< FEMDegree >::DownSample1Start );
-	static const int RightDownSampleRadius =  ( ( BSplineSupportSizes< FEMDegree >::DownSample0End   > BSplineSupportSizes< FEMDegree >::DownSample1End   ) ? BSplineSupportSizes< FEMDegree >::DownSample0End   : BSplineSupportSizes< FEMDegree >::DownSample1End   );
-	typedef TreeOctNode::NeighborKey< LeftDownSampleRadius , RightDownSampleRadius > DownSampleKey;
-
-	LocalDepth lowDepth = highDepth - 1;
-	if( lowDepth<0 ) return;
-
-	typename BSplineEvaluationData< FEMDegree , BType >::UpSampleEvaluator upSampleEvaluator;
-	BSplineEvaluationData< FEMDegree , BType >::SetUpSampleEvaluator( upSampleEvaluator , lowDepth );
-	std::vector< DownSampleKey > neighborKeys( std::max< int >( 1 , threads ) );
-
-	static const int DownSampleSize = BSplineSupportSizes< FEMDegree >::DownSample0Size > BSplineSupportSizes< FEMDegree >::DownSample1Size ? BSplineSupportSizes< FEMDegree >::DownSample0Size : BSplineSupportSizes< FEMDegree >::DownSample1Size;
-	Stencil< double , DownSampleSize > downSampleStencils[ Cube::CORNERS ];
-	int lowCenter = ( 1<<lowDepth )>>1;
-	for( int c=0 ; c<Cube::CORNERS ; c++ )
-	{
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( c , cx , cy , cz );
-		static const int DownSampleSize = BSplineSupportSizes< FEMDegree >::DownSample0Size > BSplineSupportSizes< FEMDegree >::DownSample1Size ? BSplineSupportSizes< FEMDegree >::DownSample0Size : BSplineSupportSizes< FEMDegree >::DownSample1Size;
-		for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ )
-			for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-					downSampleStencils[c]( ii , jj , kk ) = 
-					upSampleEvaluator.value( lowCenter + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] , 2*lowCenter + cx ) *
-					upSampleEvaluator.value( lowCenter + jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] , 2*lowCenter + cy ) *
-					upSampleEvaluator.value( lowCenter + kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] , 2*lowCenter + cz ) ;
-	}
-	int  lowBegin = _BSplineBegin< FEMDegree , BType >(  lowDepth ) ,  lowEnd = _BSplineEnd< FEMDegree , BType >(  lowDepth );
-	int highBegin = _BSplineBegin< FEMDegree , BType >( highDepth ) , highEnd = _BSplineEnd< FEMDegree , BType >( highDepth );
-	int lowDim = lowEnd - lowBegin , highDim = highEnd - highBegin;
-	// Iterate over all child nodes. (This is required since there can be child nodes whose parent is inactive.)
-#pragma omp parallel for num_threads( threads )
-	for( int k=0 ; k<highDim ; k++ ) for( int j=0 ; j<highDim ; j++ ) for( int i=0 ; i<highDim ; i++ )
-	{
-		DownSampleKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		LocalOffset off , _off;
-		off[0] = i + highBegin , off[1] = j + highBegin , off[2] = k + highBegin;
-		int highIdx = i + j * highDim  + k * highDim * highDim;
-		_off[0] = off[0]>>1 , _off[1] = off[1]>>1 , _off[2] = off[2]>>1;
-
-		// Want to make sure test if contained children are interior.
-		// This is more conservative because we are test that overlapping children are interior
-		bool isInterior = _IsInteriorlyOverlapped< FEMDegree , FEMDegree >( lowDepth , _off );
-		int cx = off[0]&1 , cy = off[1]&1 , cz = off[2]&1;
-		int c = Cube::CornerIndex( cx , cy , cz );
-
-		C& highCoefficient = highCoefficients[ highIdx ];
-
-		if( isInterior )
-		{
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-			{
-				int _i = _off[0] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] - lowBegin;
-				int _j = _off[1] + jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] - lowBegin;
-				for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-				{
-					int _k = _off[2] + kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] - lowBegin;
-					highCoefficient += (C)( lowCoefficients[ _i + _j*lowDim  + _k*lowDim*lowDim ] * downSampleStencils[c]( ii , jj , kk ) );
-				}
-			}
-		}
-		else
-		{
-			double downSampleValues[3][ BSplineSupportSizes< FEMDegree >::DownSample0Size > BSplineSupportSizes< FEMDegree >::DownSample1Size ? BSplineSupportSizes< FEMDegree >::DownSample0Size : BSplineSupportSizes< FEMDegree >::DownSample1Size ];
-
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) downSampleValues[0][ii] = upSampleEvaluator.value( _off[0] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] , off[0] );
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; ii++ ) downSampleValues[1][ii] = upSampleEvaluator.value( _off[1] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] , off[1] );
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; ii++ ) downSampleValues[2][ii] = upSampleEvaluator.value( _off[2] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] , off[2] );
-
-			for( int ii=0 ; ii<BSplineSupportSizes< FEMDegree >::DownSampleSize[cx] ; ii++ ) for( int jj=0 ; jj<BSplineSupportSizes< FEMDegree >::DownSampleSize[cy] ; jj++ )
-			{
-				double dxy = downSampleValues[0][ii] * downSampleValues[1][jj];
-				int _i = _off[0] + ii + BSplineSupportSizes< FEMDegree >::DownSampleStart[cx] - lowBegin;
-				int _j = _off[1] + jj + BSplineSupportSizes< FEMDegree >::DownSampleStart[cy] - lowBegin;
-				if( _i>=0 && _i<lowDim && _j>=0 && _j<lowDim )
-					for( int kk=0 ; kk<BSplineSupportSizes< FEMDegree >::DownSampleSize[cz] ; kk++ )
-					{
-						int _k = _off[2] + kk + BSplineSupportSizes< FEMDegree >::DownSampleStart[cz] - lowBegin;
-						if( _k>=0 && _k<lowDim ) highCoefficient += (C)( lowCoefficients[ _i + _j*lowDim  + _k*lowDim*lowDim ] * dxy * downSampleValues[2][kk] );
-					}
-			}
-		}
-	}
-}
-
-template< class Real >
-template< class C , int FEMDegree , BoundaryType BType >
-DenseNodeData< C , FEMDegree > Octree< Real >::coarseCoefficients( const DenseNodeData< C , FEMDegree >& coefficients ) const
-{
-	DenseNodeData< Real , FEMDegree > coarseCoefficients( _sNodesEnd(_maxDepth-1) );
-	memset( &coarseCoefficients[0] , 0 , sizeof(Real)*_sNodesEnd(_maxDepth-1) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ ) coarseCoefficients[i] = coefficients[i];
-	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample< C , FEMDegree , BType >( d , coarseCoefficients );
-	return coarseCoefficients;
-}
-template< class Real >
-template< class C , int FEMDegree , BoundaryType BType >
-DenseNodeData< C , FEMDegree > Octree< Real >::coarseCoefficients( const SparseNodeData< C , FEMDegree >& coefficients ) const
-{
-	DenseNodeData< Real , FEMDegree > coarseCoefficients( _sNodesEnd(_maxDepth-1) );
-	memset( &coarseCoefficients[0] , 0 , sizeof(Real)*_sNodesEnd(_maxDepth-1) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ )
-	{
-		const C* c = coefficients( _sNodes.treeNodes[i] );
-		if( c ) coarseCoefficients[i] = *c;
-	}
-	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample< C , FEMDegree , BType >( d , coarseCoefficients );
-	return coarseCoefficients;
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-Real Octree< Real >::_coarserFunctionValue( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* pointNode , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	double pointValue = 0;
-	LocalDepth depth = _localDepth( pointNode );
-	if( depth<0 ) return (Real)0.;
-
-	// Iterate over all basis functions that overlap the point at the coarser resolution
-	{
-		const typename TreeOctNode::Neighbors< SupportSize >& neighbors = neighborKey.neighbors[ _localToGlobal( depth-1 ) ];
-		LocalDepth _d ; LocalOffset _off;
-		_localDepthAndOffset( pointNode->parent , _d , _off );
-		int fStart , fEnd;
-		BSplineData< FEMDegree , BType >::FunctionSpan( _d , fStart , fEnd );
-
-		double pointValues[ DIMENSION ][SupportSize];
-		memset( pointValues , 0 , sizeof(double) * DIMENSION * SupportSize );
-
-		for( int dd=0 ; dd<DIMENSION ; dd++ ) for( int i=-LeftPointSupportRadius ; i<=RightPointSupportRadius ; i++ )
-		{
-			int fIdx = BSplineData< FEMDegree , BType >::FunctionIndex( _d , _off[dd]+i );
-			if( fIdx>=fStart && fIdx<fEnd ) pointValues[dd][i+LeftPointSupportRadius] = bsData.baseBSplines[ fIdx ][LeftSupportRadius-i]( p[dd] );
-		}
-
-		for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			double xyValue = pointValues[0][j] * pointValues[1][k];
-			double _pointValue = 0;
-			for( int l=0 ; l<SupportSize ; l++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[j][k][l];
-				if( _isValidFEMNode( _node ) ) _pointValue += pointValues[2][l] * double( upSampledCoefficients[_node->nodeData.nodeIndex] );
-			}
-			pointValue += _pointValue * xyValue;
-		}
-	}
-	return Real( pointValue );
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-Point3D< Real > Octree< Real >::_coarserFunctionGradient( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* pointNode , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int  LeftPointSupportRadius =   BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = - BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	Point3D< double > pointGradient;
-	LocalDepth depth = _localDepth( pointNode );
-	if( depth<=0 ) return Real(0.);
-
-	// Iterate over all basis functions that overlap the point at the coarser resolution
-	{
-		const typename TreeOctNode::Neighbors< SupportSize >& neighbors = neighborKey.neighbors[ _localToGlobal( depth-1 ) ];
-		LocalDepth _d ; LocalOffset _off;
-		_localDepthAndOffset( pointNode->parent , _d , _off );
-		int fStart , fEnd;
-		BSplineData< FEMDegree , BType >::FunctionSpan( _d , fStart , fEnd );
-
-		double _pointValues[ DIMENSION ][SupportSize] , dPointValues[ DIMENSION ][SupportSize];
-		memset( _pointValues , 0 , sizeof(double) * DIMENSION * SupportSize );
-		memset( dPointValues , 0 , sizeof(double) * DIMENSION * SupportSize );
-
-		for( int dd=0 ; dd<DIMENSION ; dd++ ) for( int i=-LeftPointSupportRadius ; i<=RightPointSupportRadius ; i++ )
-		{
-			int fIdx = BSplineData< FEMDegree , BType >::FunctionIndex( _d , _off[dd]+i );
-			if( fIdx>=fStart && fIdx<fEnd )
-			{
-				_pointValues[dd][i+LeftPointSupportRadius] = bsData.baseBSplines[ fIdx ][LeftSupportRadius-i]( p[dd] );
-				dPointValues[dd][i+LeftPointSupportRadius] = bsData.dBaseBSplines[ fIdx ][LeftSupportRadius-i]( p[dd] );
-			}
-		}
-
-		for( int j=0 ; j<SupportSize ; j++ ) for( int k=0 ; k<SupportSize ; k++ )
-		{
-			double _x_yValue = _pointValues[0][j] * _pointValues[1][k];
-			double dx_yValue = dPointValues[0][j] * _pointValues[1][k];
-			double _xdyValue = _pointValues[0][j] * dPointValues[1][k];
-			double __pointValue = 0 , _dPointValue = 0;
-			for( int l=0 ; l<SupportSize ; l++ )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[j][k][l];
-				if( _isValidFEMNode( _node ) )
-				{
-					__pointValue += _pointValues[2][l] * double( upSampledCoefficients[_node->nodeData.nodeIndex] );
-					_dPointValue += dPointValues[2][l] * double( upSampledCoefficients[_node->nodeData.nodeIndex] );
-				}
-			}
-
-			pointGradient += Point3D< double >( __pointValue * dx_yValue , __pointValue * _xdyValue , _dPointValue * _x_yValue );
-		}
-	}
-	return Point3D< Real >( pointGradient );
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-Real Octree< Real >::_finerFunctionValue( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* pointNode , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& finerCoefficients ) const
-{
-	typename TreeOctNode::Neighbors< BSplineSupportSizes< FEMDegree >::SupportSize > childNeighbors;
-	static const int  LeftPointSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int  LeftSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-
-	double pointValue = 0;
-	LocalDepth depth = _localDepth( pointNode );
-	neighborKey.template getChildNeighbors< false >( _childIndex( pointNode , p ) , _localToGlobal( depth ) , childNeighbors );
-	for( int j=-LeftPointSupportRadius ; j<=RightPointSupportRadius ; j++ )
-		for( int k=-LeftPointSupportRadius ; k<=RightPointSupportRadius ; k++ )
-			for( int l=-LeftPointSupportRadius ; l<=RightPointSupportRadius ; l++ )
-			{
-				const TreeOctNode* _node = childNeighbors.neighbors[j+LeftPointSupportRadius][k+LeftPointSupportRadius][l+LeftPointSupportRadius];
-				if( _isValidFEMNode( _node ) )
-				{
-					int fIdx[3];
-					functionIndex< FEMDegree , BType >( _node , fIdx );
-					pointValue += 
-						bsData.baseBSplines[ fIdx[0] ][LeftSupportRadius-j]( p[0] ) *
-						bsData.baseBSplines[ fIdx[1] ][LeftSupportRadius-k]( p[1] ) *
-						bsData.baseBSplines[ fIdx[2] ][LeftSupportRadius-l]( p[2] ) *
-						double( finerCoefficients[ _node->nodeData.nodeIndex ] );
-				}
-			}
-	return Real( pointValue );
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-Point3D< Real > Octree< Real >::_finerFunctionGradient( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* pointNode , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& finerCoefficients ) const
-{
-	typename TreeOctNode::Neighbors< BSplineSupportSizes< FEMDegree >::SupportSize > childNeighbors;
-	static const int  LeftPointSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int  LeftSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-
-	Point3D< double > pointGradient = 0;
-	LocalDepth depth = _localDepth( pointNode );
-	neighborKey.template getChildNeighbors< false >( _childIndex( pointNode , p ) , _localToGlobal( depth ) , childNeighbors );
-	for( int j=-LeftPointSupportRadius ; j<=RightPointSupportRadius ; j++ )
-		for( int k=-LeftPointSupportRadius ; k<=RightPointSupportRadius ; k++ )
-			for( int l=-LeftPointSupportRadius ; l<=RightPointSupportRadius ; l++ )
-			{
-				const TreeOctNode* _node = childNeighbors.neighbors[j+LeftPointSupportRadius][k+LeftPointSupportRadius][l+LeftPointSupportRadius];
-				if( _isValidFEMNode( _node ) )
-				{
-					int fIdx[3];
-					functionIndex< FEMDegree , BType >( _node , fIdx );
-					double  x = bsData. baseBSplines[ fIdx[0] ][LeftSupportRadius-j]( p[0] ) ,  y = bsData. baseBSplines[ fIdx[1] ][LeftSupportRadius-k]( p[1] ) ,  z = bsData. baseBSplines[ fIdx[2] ][LeftSupportRadius-l]( p[2] );
-					double dx = bsData.dBaseBSplines[ fIdx[0] ][LeftSupportRadius-j]( p[0] ) , dy = bsData.dBaseBSplines[ fIdx[1] ][LeftSupportRadius-k]( p[1] ) , dz = bsData.dBaseBSplines[ fIdx[2] ][LeftSupportRadius-l]( p[2] );
-					pointGradient += Point3D< double >( dx * y * z , x * dy * z , x * y * dz ) * (double)( finerCoefficients[ _node->nodeData.nodeIndex ] );
-				}
-			}
-	return Point3D< Real >( pointGradient );
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , bool HasGradients >
-void Octree< Real >::_setPointValuesFromCoarser( InterpolationInfo< HasGradients >& interpolationInfo , LocalDepth highDepth , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients )
-{
-	LocalDepth lowDepth = highDepth-1;
-	if( lowDepth<0 ) return;
-	std::vector< PointSupportKey< FEMDegree > > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
-
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		PointSupportKey< FEMDegree >& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		PointData< Real , HasGradients >* pData = interpolationInfo( _sNodes.treeNodes[i] );
-		if( pData )
-		{
-			neighborKey.template getNeighbors< false >( _sNodes.treeNodes[i]->parent );
-#if POINT_DATA_RES
-			for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ ) if( (*pData)[c].weight )
-				_ConstraintCalculator_< Real , FEMDegree , HasGradients >::_CalculateCoarser_
-				(
-					c , *pData ,
-					_coarserFunctionValue( (*pData)[c].position , neighborKey , _sNodes.treeNodes[i] , bsData , upSampledCoefficients ) ,
-					HasGradients ? _coarserFunctionGradient( (*pData)[c].position , neighborKey , _sNodes.treeNodes[i] , bsData , upSampledCoefficients ) : Point3D< Real >() ,
-					interpolationInfo.valueWeight , interpolationInfo.gradientWeight 
-				);
-#else // !POINT_DATA_RES
-			_ConstraintCalculator_< Real , FEMDegree , HasGradients >::_CalculateCoarser_
-			(
-				*pData ,
-				_coarserFunctionValue( pData->position , neighborKey , _sNodes.treeNodes[i] , bsData , upSampledCoefficients ) ,
-				HasGradients ? _coarserFunctionGradient( pData->position , neighborKey , _sNodes.treeNodes[i] , bsData , upSampledCoefficients ) : Point3D< Real >() ,
-				interpolationInfo.valueWeight , interpolationInfo.gradientWeight 
-			);
-#endif // POINT_DATA_RES
-		}
-	}
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , bool HasGradients >
-void Octree< Real >::_updateCumulativeInterpolationConstraintsFromFiner( const InterpolationInfo< HasGradients >& interpolationInfo , const BSplineData< FEMDegree , BType >& bsData , LocalDepth highDepth , const DenseNodeData< Real , FEMDegree >& finerCoefficients , DenseNodeData< Real , FEMDegree >& coarserConstraints ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int  LeftPointSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int  LeftSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-
-	// Note: We can't iterate over the finer point nodes as the point weights might be
-	// scaled incorrectly, due to the adaptive exponent. So instead, we will iterate
-	// over the coarser nodes and evaluate the finer solution at the associated points.
-	LocalDepth  lowDepth = highDepth-1;
-	if( lowDepth<0 ) return;
-	size_t start = _sNodesBegin(lowDepth) , end = _sNodesEnd(lowDepth);
-	std::vector< PointSupportKey< FEMDegree > > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( lowDepth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(lowDepth) ; i<_sNodesEnd(lowDepth) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-	{
-		PointSupportKey< FEMDegree >& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		const PointData< Real , HasGradients >* pData = interpolationInfo( _sNodes.treeNodes[i] );
-		if( pData )
-		{
-			typename TreeOctNode::Neighbors< SupportSize >& neighbors = neighborKey.template getNeighbors< false >( _sNodes.treeNodes[i] );
-			// evaluate the solution @( depth ) at the current point @( depth-1 )
-#if POINT_DATA_RES
-			for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ ) if( (*pData)[c].weight )
-#endif // POINT_DATA_RES
-			{
-#if POINT_DATA_RES
-				Real finerPointDValue = _finerFunctionValue( (*pData)[c].position , neighborKey , _sNodes.treeNodes[i] , bsData , finerCoefficients ) * interpolationInfo.valueWeight * (*pData)[c].weight;
-				Point3D< Real > finerPointDGradient = HasGradients ? _finerFunctionGradient( (*pData)[c].position , neighborKey , _sNodes.treeNodes[i] , bsData , finerCoefficients ) * interpolationInfo.gradientWeight * (*pData)[c].weight : Point3D< Real >();
-				Point3D< Real > p = (*pData)[c].position;
-#else // !POINT_DATA_RES
-				Real finerPointDValue = _finerFunctionValue( pData->position , neighborKey , _sNodes.treeNodes[i] , bsData , finerCoefficients ) * interpolationInfo.valueWeight * pData->weight;
-				Point3D< Real > finerPointDGradient = HasGradients ? _finerFunctionGradient( pData->position , neighborKey , _sNodes.treeNodes[i] , bsData , finerCoefficients ) * interpolationInfo.gradientWeight * pData->weight : Point3D< Real >();
-				Point3D< Real > p = pData->position;
-#endif // POINT_DATA_RES
-				// Update constraints for all nodes @( depth-1 ) that overlap the point
-				int idx[3];
-				functionIndex< FEMDegree , BType >( _sNodes.treeNodes[i] , idx );
-				for( int x=-LeftPointSupportRadius ; x<=RightPointSupportRadius ; x++ ) for( int y=-LeftPointSupportRadius ; y<=RightPointSupportRadius ; y++ ) for( int z=-LeftPointSupportRadius ; z<=RightPointSupportRadius ; z++ )
-				{
-					const TreeOctNode* _node = neighbors.neighbors[x+LeftPointSupportRadius][y+LeftPointSupportRadius][z+LeftPointSupportRadius];
-					if( _isValidFEMNode( _node ) )
-					{
-						double px = bsData.baseBSplines[idx[0]+x][LeftSupportRadius-x]( p[0] ) , py = bsData.baseBSplines[idx[1]+y][LeftSupportRadius-y]( p[1] ) , pz = bsData.baseBSplines[idx[2]+z][LeftSupportRadius-z]( p[2] );
-#pragma omp atomic
-						coarserConstraints[ _node->nodeData.nodeIndex ] += (Real)( px * py * pz * finerPointDValue );
-						if( HasGradients )
-						{
-							double dpx = bsData.dBaseBSplines[idx[0]+x][LeftSupportRadius-x]( p[0] ) , dpy = bsData.dBaseBSplines[idx[1]+y][LeftSupportRadius-y]( p[1] ) , dpz = bsData.dBaseBSplines[idx[2]+z][LeftSupportRadius-z]( p[2] );
-#pragma omp atomic
-							coarserConstraints[ _node->nodeData.nodeIndex ] += Point3D< Real >::Dot( finerPointDGradient , Point3D< Real >( dpx * py * pz , px * dpy * pz , px * py * dpz ) );
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-int Octree< Real >::_setMatrixRow( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors , Pointer( MatrixEntry< Real > ) row , int offset , const typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , const Stencil< double , BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& stencil , const BSplineData< FEMDegree , BType >& bsData ) const
-{
-	static const int SupportSize = BSplineSupportSizes< FEMDegree >::SupportSize;
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-	static const int OverlapSize   =   BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-	static const int LeftSupportRadius  = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int LeftPointSupportRadius  = BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int RightPointSupportRadius = -BSplineSupportSizes< FEMDegree >::SupportStart;
-
-	bool hasYZPoints[SupportSize] , hasZPoints[SupportSize][SupportSize];
-	Real diagonal = 0;
-	// Given a node:
-	// -- for each node in its support:
-	// ---- if the supporting node contains a point:
-	// ------ evaluate the x, y, and z B-splines of the nodes supporting the point
-	// splineValues \in [-LeftSupportRadius,RightSupportRadius] x [-LeftSupportRadius,RightSupportRadius] x [-LeftSupportRadius,RightSupportRadius] x [0,Dimension) x [-LeftPointSupportRadius,RightPointSupportRadius]
-#if POINT_DATA_RES
-	Real _splineValues[PointData< Real , HasGradients >::SAMPLES][SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	Real wSplineValues[PointData< Real , HasGradients >::SAMPLES][SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	Real dSplineValues[PointData< Real , HasGradients >::SAMPLES][SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	memset( _splineValues , 0 , sizeof( Real ) * PointData< Real , HasGradients >::SAMPLES * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-	memset( wSplineValues , 0 , sizeof( Real ) * PointData< Real , HasGradients >::SAMPLES * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-	memset( dSplineValues , 0 , sizeof( Real ) * PointData< Real , HasGradients >::SAMPLES * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-#else // !POINT_DATA_RES
-	Real _splineValues[SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	Real wSplineValues[SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	Real dSplineValues[SupportSize][SupportSize][SupportSize][DIMENSION][SupportSize];
-	memset( _splineValues , 0 , sizeof( Real ) * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-	memset( wSplineValues , 0 , sizeof( Real ) * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-	memset( dSplineValues , 0 , sizeof( Real ) * SupportSize * SupportSize * SupportSize * DIMENSION *SupportSize );
-#endif // NEW_POINT_DATA
-
-	int count = 0;
-	const TreeOctNode* node = neighbors.neighbors[OverlapRadius][OverlapRadius][OverlapRadius];
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-	int fStart , fEnd;
-	BSplineData< FEMDegree , BType >::FunctionSpan( d , fStart , fEnd );
-	bool isInterior = _isInteriorlyOverlapped< FEMDegree , FEMDegree >( node );
-
-	if( interpolationInfo )
-	{
-		// Iterate over all neighboring nodes that may have a constraining point
-		// -- For each one, compute the values of the spline functions supported on the point
-		for( int j=0 ; j<SupportSize ; j++ )
-		{
-			hasYZPoints[j] = false;
-			for( int k=0 ; k<SupportSize ; k++ ) hasZPoints[j][k] = false;
-		}
-		for( int j=-LeftSupportRadius , jj=0 ; j<=RightSupportRadius ; j++ , jj++ )
-			for( int k=-LeftSupportRadius , kk=0 ; k<=RightSupportRadius ; k++ , kk++ )
-				for( int l=-LeftSupportRadius , ll=0 ; l<=RightSupportRadius ; l++ , ll++ )
-				{
-					const TreeOctNode* _node = neighbors.neighbors[OverlapRadius+j][OverlapRadius+k][OverlapRadius+l];
-					if( _isValidSpaceNode( _node ) && (*interpolationInfo)( _node ) )
-					{
-						int pOff[] = { off[0]+j , off[1]+k , off[2]+l };
-						hasYZPoints[jj] = hasZPoints[jj][kk] = true;
-						const PointData< Real , HasGradients >& pData = *( (*interpolationInfo)( _node ) );
-
-#if POINT_DATA_RES
-						for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ ) if( pData[c].weight )
-#endif // POINT_DATA_RES
-						{
-#if POINT_DATA_RES
-							Real (*__splineValues)[SupportSize] = _splineValues[c][jj][kk][ll];
-							Real (*_wSplineValues)[SupportSize] = wSplineValues[c][jj][kk][ll];
-							Real (*_dSplineValues)[SupportSize] = dSplineValues[c][jj][kk][ll];
-							Real weight = pData[c].weight;
-							Point3D< Real > p = pData[c].position;
-#else // !POINT_DATA_RES
-							Real (*__splineValues)[SupportSize] = _splineValues[jj][kk][ll];
-							Real (*_wSplineValues)[SupportSize] = wSplineValues[jj][kk][ll];
-							Real (*_dSplineValues)[SupportSize] = dSplineValues[jj][kk][ll];
-							Real weight = pData.weight;
-							Point3D< Real > p = pData.position;
-#endif // POINT_DATA_RES
-
-							// evaluate the point p at all the nodes whose functions have it in their support
-							for( int s=-LeftPointSupportRadius ; s<=RightPointSupportRadius ; s++ ) for( int dd=0 ; dd<DIMENSION ; dd++ )
-							{
-								int fIdx = BSplineData< FEMDegree , BType >::FunctionIndex( d , pOff[dd]+s );
-								if( fIdx>=fStart && fIdx<fEnd )
-								{
-									_wSplineValues[dd][ s+LeftPointSupportRadius ] = __splineValues[dd][ s+LeftPointSupportRadius ] = Real( bsData.baseBSplines[ fIdx ][ -s+LeftSupportRadius ]( p[dd] ) );
-									if( HasGradients ) _dSplineValues[dd][ s+LeftPointSupportRadius ] = Real( bsData.dBaseBSplines[ fIdx ][ -s+LeftSupportRadius ]( p[dd] ) );
-								}
-							}
-							// The value of the function of the node that we started with
-							Real value = __splineValues[0][-j+LeftPointSupportRadius] * __splineValues[1][-k+LeftPointSupportRadius] * __splineValues[2][-l+LeftPointSupportRadius];
-							Real weightedValue = value * interpolationInfo->valueWeight * weight;
-							Point3D< Real > weightedGradient;
-							if( HasGradients )
-							{
-								Point3D< Real > gradient
-									(
-									_dSplineValues[0][-j+LeftPointSupportRadius] * __splineValues[1][-k+LeftPointSupportRadius] * __splineValues[2][-l+LeftPointSupportRadius] ,
-									__splineValues[0][-j+LeftPointSupportRadius] * _dSplineValues[1][-k+LeftPointSupportRadius] * __splineValues[2][-l+LeftPointSupportRadius] ,
-									__splineValues[0][-j+LeftPointSupportRadius] * __splineValues[1][-k+LeftPointSupportRadius] * _dSplineValues[2][-l+LeftPointSupportRadius]
-									);
-								weightedGradient = gradient * interpolationInfo->gradientWeight * weight;
-								diagonal += value * weightedValue + Point3D< Real >::Dot( gradient , weightedGradient );
-							}
-							else diagonal += value * weightedValue;
-
-							// Pre-multiply the x-coordinate values so that when we evaluate at one of the neighboring basis functions
-							// we get the product of the values of the center base function and the base function of the neighboring node
-							if( HasGradients ) for( int s=0 ; s<SupportSize ; s++ ) _wSplineValues[0][s] *= weightedValue , _dSplineValues[0][s] *= weightedGradient[0] , _dSplineValues[1][s] *= weightedGradient[1] , _dSplineValues[2][s] *= weightedGradient[2];
-							else               for( int s=0 ; s<SupportSize ; s++ ) _wSplineValues[0][s] *= weightedValue;
-						}
-					}
-				}
-	}
-
-	Real pointValues[OverlapSize][OverlapSize][OverlapSize];
-	if( interpolationInfo )
-	{
-		memset( pointValues , 0 , sizeof(Real) * OverlapSize * OverlapSize * OverlapSize );
-		// Iterate over all supported neighbors that could have a point constraint	
-		for( int i=-LeftSupportRadius ; i<=RightSupportRadius ; i++ ) if( hasYZPoints[i+LeftSupportRadius] )
-			for( int j=-LeftSupportRadius ; j<=RightSupportRadius ; j++ ) if( hasZPoints[i+LeftSupportRadius][j+LeftSupportRadius] )
-				for( int k=-LeftSupportRadius ; k<=RightSupportRadius ; k++ )
-				{
-					const TreeOctNode* _node = neighbors.neighbors[i+OverlapRadius][j+OverlapRadius][k+OverlapRadius];
-					if( _isValidSpaceNode( _node ) && (*interpolationInfo)( _node ) )
-					{
-						const PointData< Real , HasGradients >& pData = *( (*interpolationInfo)( _node ) );
-#if POINT_DATA_RES
-						for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ ) if( pData[c].weight )
-#endif // POINT_DATA_RES
-						{
-#if POINT_DATA_RES
-							Real (*__splineValues)[SupportSize] = _splineValues[c][i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-							Real (*_wSplineValues)[SupportSize] = wSplineValues[c][i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-							Real (*_dSplineValues)[SupportSize] = dSplineValues[c][i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-#else // !POINT_DATA_RES
-							Real (*__splineValues)[SupportSize] = _splineValues[i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-							Real (*_wSplineValues)[SupportSize] = wSplineValues[i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-							Real (*_dSplineValues)[SupportSize] = dSplineValues[i+LeftSupportRadius][j+LeftSupportRadius][k+LeftSupportRadius];
-#endif // POINT_DATA_RES
-							// Iterate over all neighbors whose support contains the point and accumulate the mutual integral
-							for( int ii=-LeftPointSupportRadius ; ii<=RightPointSupportRadius ; ii++ )
-								for( int jj=-LeftPointSupportRadius ; jj<=RightPointSupportRadius ; jj++ )
-									if( HasGradients )
-									{
-										Real partialW_SplineValue = _wSplineValues[0][ii+LeftPointSupportRadius ] * __splineValues[1][jj+LeftPointSupportRadius ];
-										Real partial__SplineValue = __splineValues[0][ii+LeftPointSupportRadius ] * __splineValues[1][jj+LeftPointSupportRadius ];
-										Real partialD0SplineValue = _dSplineValues[0][ii+LeftPointSupportRadius ] * __splineValues[1][jj+LeftPointSupportRadius ];
-										Real partialD1SplineValue = __splineValues[0][ii+LeftPointSupportRadius ] * _dSplineValues[1][jj+LeftPointSupportRadius ];
-										Real* _pointValues = pointValues[i+ii+OverlapRadius][j+jj+OverlapRadius] + k + OverlapRadius;
-										Real* ___splineValues = __splineValues[2] + LeftPointSupportRadius;
-										Real* __dSplineValues = _dSplineValues[2] + LeftPointSupportRadius;
-										TreeOctNode* const * _neighbors = neighbors.neighbors[i+ii+OverlapRadius][j+jj+OverlapRadius] + k + OverlapRadius;
-										for( int kk=-LeftPointSupportRadius ; kk<=RightPointSupportRadius ; kk++ ) if( _isValidFEMNode( _neighbors[kk] ) )
-											_pointValues[kk] +=
-												partialW_SplineValue * ___splineValues[kk] + partialD0SplineValue * ___splineValues[kk] + partialD1SplineValue * ___splineValues[kk] + partial__SplineValue * __dSplineValues[kk];
-									}
-									else
-									{
-										Real partialWSplineValue = _wSplineValues[0][ii+LeftPointSupportRadius ] * __splineValues[1][jj+LeftPointSupportRadius ];
-										Real* _pointValues = pointValues[i+ii+OverlapRadius][j+jj+OverlapRadius] + k + OverlapRadius;
-										Real* ___splineValues = __splineValues[2] + LeftPointSupportRadius;
-										TreeOctNode* const * _neighbors = neighbors.neighbors[i+ii+OverlapRadius][j+jj+OverlapRadius] + k + OverlapRadius;
-										for( int kk=-LeftPointSupportRadius ; kk<=RightPointSupportRadius ; kk++ ) if( _isValidFEMNode( _neighbors[kk] ) )
-											_pointValues[kk] += partialWSplineValue * ___splineValues[kk];
-									}
-						}
-					}
-				}
-	}
-	pointValues[OverlapRadius][OverlapRadius][OverlapRadius] = diagonal;
-	int nodeIndex = neighbors.neighbors[OverlapRadius][OverlapRadius][OverlapRadius]->nodeData.nodeIndex;
-	if( isInterior ) // General case, so try to make fast
-	{
-		const TreeOctNode* const * _nodes = &neighbors.neighbors[0][0][0];
-		const double* _stencil = &stencil( 0 , 0 , 0 );
-		Real* _values = &pointValues[0][0][0];
-		const static int CenterIndex = OverlapSize*OverlapSize*OverlapRadius + OverlapSize*OverlapRadius + OverlapRadius;
-		if( interpolationInfo ) for( int i=0 ; i<OverlapSize*OverlapSize*OverlapSize ; i++ ) _values[i] = Real( _stencil[i] + _values[i] );
-		else                    for( int i=0 ; i<OverlapSize*OverlapSize*OverlapSize ; i++ ) _values[i] = Real( _stencil[i] );
-
-		row[count++] = MatrixEntry< Real >( nodeIndex-offset , _values[CenterIndex] );
-		for( int i=0 ; i<OverlapSize*OverlapSize*OverlapSize ; i++ ) if( i!=CenterIndex && _isValidFEMNode( _nodes[i] ) )
-			row[count++] = MatrixEntry< Real >( _nodes[i]->nodeData.nodeIndex-offset , _values[i] );
-	}
-	else
-	{
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		Real temp = (Real)F.integrate( integrator , off , off );
-		if( interpolationInfo ) temp += pointValues[OverlapRadius][OverlapRadius][OverlapRadius];
-		row[count++] = MatrixEntry< Real >( nodeIndex-offset , temp );
-		for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-			if( (x!=OverlapRadius || y!=OverlapRadius || z!=OverlapRadius) && _isValidFEMNode( neighbors.neighbors[x][y][z] ) )
-			{
-				const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-				LocalDepth _d ; LocalOffset _off;
-				_localDepthAndOffset( _node , _d , _off );
-				Real temp = (Real)F.integrate( integrator , _off , off );
-				if( interpolationInfo ) temp += pointValues[x][y][z];
-				row[count++] = MatrixEntry< Real >( _node->nodeData.nodeIndex-offset , temp );
-			}
-	}
-	return count;
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-int Octree< Real >::_getMatrixAndUpdateConstraints( const FEMSystemFunctor& F , const InterpolationInfo<  HasGradients >* interpolationInfo , SparseMatrix< Real >& matrix , DenseNodeData< Real , FEMDegree >& constraints , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const BSplineData< FEMDegree , BType >& bsData , LocalDepth depth , const DenseNodeData< Real , FEMDegree >& metSolution , bool coarseToFine )
-{
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-	static const int OverlapSize   =   BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-
-	size_t start = _sNodesBegin(depth) , end = _sNodesEnd(depth) , range = end-start;
-	Stencil< double , OverlapSize > stencil , stencils[2][2][2];
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencil ( F ,      integrator , stencil  );
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencils( F , childIntegrator , stencils );
-	matrix.Resize( (int)range );
-	std::vector< AdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<(int)range ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i+start] ) )
-	{
-		AdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* node = _sNodes.treeNodes[i+start];
-		// Get the matrix row size
-		typename TreeOctNode::Neighbors< OverlapSize > neighbors;
-		neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( node , neighbors );
-		int count = _getMatrixRowSize< FEMDegree , BType >( neighbors );
-		// Allocate memory for the row
-		matrix.SetRowSize( i , count );
-
-		// Set the row entries
-		matrix.rowSizes[i] = _setMatrixRow( F , interpolationInfo , neighbors , matrix[i] , (int)start , integrator , stencil , bsData );
-		if( coarseToFine && depth>0 )
-		{
-			// Offset the constraints using the solution from lower resolutions.
-			int x , y , z;
-			Cube::FactorCornerIndex( (int)( node - node->parent->children ) , x , y , z );
-			typename TreeOctNode::Neighbors< OverlapSize > pNeighbors;
-			neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( node->parent , pNeighbors );
-			_updateConstraintsFromCoarser( F , interpolationInfo , neighbors , pNeighbors , node , constraints , metSolution , childIntegrator , stencils[x][y][z] , bsData );
-		}
-	}
-	memoryUsage();
-	return 1;
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-int Octree< Real >::_getSliceMatrixAndUpdateConstraints( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , SparseMatrix< Real >& matrix , DenseNodeData< Real , FEMDegree >& constraints , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const BSplineData< FEMDegree , BType >& bsData , LocalDepth depth , int slice , const DenseNodeData< Real , FEMDegree >& metSolution , bool coarseToFine )
-{
-	static const int OverlapSize   =  BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-	static const int OverlapRadius = -BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-
-	int nStart = _sNodesBegin( depth , slice ) , nEnd = _sNodesEnd( depth , slice );
-	size_t range = nEnd - nStart;
-	Stencil< double , OverlapSize > stencil , stencils[2][2][2];
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencil ( F ,      integrator , stencil  );
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencils( F , childIntegrator , stencils );
-
-	matrix.Resize( (int)range );
-	std::vector< AdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<(int)range ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i+nStart] ) )
-	{
-		AdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* node = _sNodes.treeNodes[i+nStart];
-		// Get the matrix row size
-		typename TreeOctNode::Neighbors< OverlapSize > neighbors;
-		neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( node , neighbors );
-		int count = _getMatrixRowSize< FEMDegree , BType >( neighbors );
-
-		// Allocate memory for the row
-		matrix.SetRowSize( i , count );
-
-		// Set the row entries
-		matrix.rowSizes[i] = _setMatrixRow( F , interpolationInfo , neighbors , matrix[i] , _sNodesBegin( depth , slice ) , integrator , stencil , bsData );
-
-		if( coarseToFine && depth>0 )
-		{
-			// Offset the constraints using the solution from lower resolutions.
-			int x , y , z;
-			Cube::FactorCornerIndex( (int)( node - node->parent->children ) , x , y , z );
-			typename TreeOctNode::Neighbors< OverlapSize > pNeighbors;
-			neighborKey.template getNeighbors< false, OverlapRadius , OverlapRadius >( node->parent , pNeighbors );
-			_updateConstraintsFromCoarser( F , interpolationInfo , neighbors , pNeighbors , node , constraints , metSolution , childIntegrator , stencils[x][y][z] , bsData );
-		}
-	}
-#if !defined( _WIN32 ) && !defined( _WIN64 )
-#pragma message( "[WARNING] I'm not sure how expensive this system call is on non-Windows system. (You may want to comment this out.)" )
-#endif // !_WIN32 && !_WIN64
-	memoryUsage();
-	return 1;
-}
-
-#ifndef MOD
-#define MOD( a , b ) ( (a)>0 ? (a) % (b) : ( (b) - ( -(a) % (b) ) ) % (b) )
-#endif // MOD
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-int Octree< Real >::_solveSystemGS( const FEMSystemFunctor& F , const BSplineData< FEMDegree , BType >& bsData , InterpolationInfo< HasGradients >* interpolationInfo , LocalDepth depth , DenseNodeData< Real , FEMDegree >& solution , DenseNodeData< Real , FEMDegree >& constraints , DenseNodeData< Real , FEMDegree >& metSolutionConstraints , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms )
-{
-	const int OverlapRadius = -BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template      Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >      integrator;
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) > childIntegrator;
-	BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetIntegrator( integrator , depth );
-	if( depth>0 ) BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetChildIntegrator( childIntegrator , depth-1 );
-
-	DenseNodeData< Real , FEMDegree >& metSolution    = metSolutionConstraints;	// This stores the up-sampled solution up to depth-2
-	DenseNodeData< Real , FEMDegree >& metConstraints = metSolutionConstraints; // This stores the down-sampled constraints up to depth
-
-	int sliceBegin = _BSplineBegin< FEMDegree , BType >( depth ) , sliceEnd = _BSplineEnd< FEMDegree , BType >( depth );
-	double&   systemTime = stats.  systemTime;
-	double&    solveTime = stats.   solveTime;
-	double& evaluateTime = stats.evaluateTime;
-	systemTime = solveTime = evaluateTime = 0.;
-
-	if( coarseToFine )
-	{
-		if( depth>0 )
-		{
-			// Up-sample the cumulative change in solution @(depth-2) into the cumulative change in solution @(depth-1)
-			if( depth-2>=0 ) _upSample< Real , FEMDegree , BType >( depth-1 , metSolution );
-			// Add in the change in solution @(depth-1)
-#pragma omp parallel for num_threads( threads )
-			for( int i=_sNodesBegin(depth-1) ; i<_sNodesEnd(depth-1) ; i++ ) metSolution[i] += solution[i];
-			// evaluate the points @(depth) using the cumulative change in solution @(depth-1)
-			if( interpolationInfo )
-			{
-				evaluateTime = Time();
-				_setPointValuesFromCoarser( *interpolationInfo , depth , bsData , metSolution );
-				evaluateTime = Time() - evaluateTime;
-			}
-		}
-	}
-	else if( depth<_maxDepth ) for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) constraints[i] -= metConstraints[i];
-	double bNorm = 0 , inRNorm = 0 , outRNorm = 0;
-	if( depth>=0 )
-	{
-		// Add padding space if we are computing residuals
-		int frontOffset = computeNorms ? OverlapRadius : 0 , backOffset = computeNorms ? OverlapRadius : 0;
-		// Set the number of in-memory slices required for a temporally blocked solver
-		int solveSlices = std::max< int >( 0 , std::min< int >( OverlapRadius*iters - (OverlapRadius-1) , sliceEnd-sliceBegin ) ) , matrixSlices = std::max< int >( 1 , std::min< int >( solveSlices+frontOffset+backOffset , sliceEnd-sliceBegin ) );
-		// The list of matrices for each in-memory slices
-		std::vector< SparseMatrix< Real > > _M( matrixSlices );
-		// The list of multi-colored indices  for each in-memory slice
-		std::vector< std::vector< std::vector< int > > > __mcIndices( solveSlices );
-
-		int dir = coarseToFine ? -1 : 1 , start = coarseToFine ? sliceEnd-1 : sliceBegin , end = coarseToFine ? sliceBegin-1 : sliceEnd;
-		for( int frontSlice=start-frontOffset*dir , backSlice = frontSlice-OverlapRadius*(iters-1)*dir ; backSlice!=end+backOffset*dir ; frontSlice+=dir , backSlice+=dir )
-		{
-			double t;
-			if( frontSlice+frontOffset*dir>=sliceBegin && frontSlice+frontOffset*dir<sliceEnd )
-			{
-				int s = frontSlice+frontOffset*dir , _s = MOD( s , matrixSlices );
-				t = Time();
-				// Compute the system matrix
-				_getSliceMatrixAndUpdateConstraints( F , interpolationInfo , _M[_s] , constraints , integrator , childIntegrator , bsData , depth , s , metSolution , coarseToFine );
-				systemTime += Time()-t;
-				// Compute residuals
-				if( computeNorms )
-				{
-					ConstPointer( Real ) B = GetPointer( &constraints[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-					Pointer( Real ) X = GetPointer( &solution[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-#pragma omp parallel for num_threads( threads ) reduction( + : bNorm , inRNorm )
-					for( int j=0 ; j<_M[_s].rows ; j++ )
-					{
-						Real temp = Real(0);
-						ConstPointer( MatrixEntry< Real > ) start = _M[_s][j];
-						ConstPointer( MatrixEntry< Real > ) end = start + _M[_s].rowSizes[j];
-						ConstPointer( MatrixEntry< Real > ) e;
-						for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
-						bNorm += B[j]*B[j];
-						inRNorm += (temp-B[j]) * (temp-B[j]);
-					}
-				}
-			}
-			t = Time();
-			// Compute the multicolor indices
-			if( iters && frontSlice>=sliceBegin && frontSlice<sliceEnd )
-			{
-				int s = frontSlice , _s = MOD( s , matrixSlices ) , __s = MOD( s , solveSlices );
-				for( int i=0 ; i<int( __mcIndices[__s].size() ) ; i++ ) __mcIndices[__s][i].clear();
-				_setMultiColorIndices< FEMDegree >( _sNodesBegin( depth , s ) , _sNodesEnd( depth , s ) , __mcIndices[__s] );
-			}
-			// Advance through the in-memory slices, taking an appropriately sized stride
-			for( int slice=frontSlice ; slice*dir>=backSlice*dir ; slice-=OverlapRadius*dir )
-				if( slice>=sliceBegin && slice<sliceEnd )
-				{
-					int s = slice , _s = MOD( s , matrixSlices ) , __s = MOD( s , solveSlices );
-					// Do the GS solver
-					ConstPointer( Real ) B = GetPointer( &constraints[0] + _sNodesBegin( depth)  , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-					Pointer( Real ) X = GetPointer( &solution[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-					SparseMatrix< Real >::SolveGS( __mcIndices[__s] , _M[_s] , B , X , !coarseToFine , threads );
-				}
-			solveTime += Time() - t;
-			// Compute residuals
-			if( computeNorms && backSlice-backOffset*dir>=sliceBegin && backSlice-backOffset*dir<sliceEnd )
-			{
-				int s = backSlice-backOffset*dir , _s = MOD( s , matrixSlices );
-				ConstPointer( Real ) B = GetPointer( &constraints[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-				Pointer( Real ) X = GetPointer( &solution[0] + _sNodesBegin( depth ) , _sNodesSize( depth ) ) + ( _sNodesBegin( depth , s ) - _sNodesBegin( depth ) );
-#pragma omp parallel for num_threads( threads ) reduction( + : outRNorm )
-				for( int j=0 ; j<_M[_s].rows ; j++ )
-				{
-					Real temp = Real(0);
-					ConstPointer( MatrixEntry< Real > ) start = _M[_s][j];
-					ConstPointer( MatrixEntry< Real > ) end = start + _M[_s].rowSizes[j];
-					ConstPointer( MatrixEntry< Real > ) e;
-					for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
-					outRNorm += (temp-B[j]) * (temp-B[j]);
-				}
-			}
-		}
-	}
-	if( computeNorms ) stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
-
-	if( !coarseToFine && depth>0 )
-	{
-		// Explicitly compute the restriction of the met solution onto the coarser nodes
-		// and down-sample the previous accumulation
-		{
-			_updateCumulativeIntegralConstraintsFromFiner( F , bsData , depth , solution , metConstraints );
-			if( interpolationInfo ) _updateCumulativeInterpolationConstraintsFromFiner( *interpolationInfo , bsData , depth , solution , metConstraints );
-			if( depth<_maxDepth ) _downSample< Real , FEMDegree , BType >( depth , metConstraints );
-		}
-	}
-	memoryUsage();
-
-	return iters;
-}
-#undef MOD
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-int Octree< Real >::_solveSystemCG( const FEMSystemFunctor& F , const BSplineData< FEMDegree , BType >& bsData , InterpolationInfo< HasGradients >* interpolationInfo , LocalDepth depth , DenseNodeData< Real , FEMDegree >& solution , DenseNodeData< Real , FEMDegree >& constraints , DenseNodeData< Real , FEMDegree >& metSolutionConstraints , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms , double accuracy )
-{
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template      Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >      integrator;
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) > childIntegrator;
-	BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetIntegrator( integrator , depth );
-	if( depth>0 ) BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetChildIntegrator( childIntegrator , depth-1 );
-
-	DenseNodeData< Real , FEMDegree >& metSolution    = metSolutionConstraints;	// This stores the up-sampled solution up to depth-2
-	DenseNodeData< Real , FEMDegree >& metConstraints = metSolutionConstraints; // This stores the down-sampled constraints up to depth
-
-	int iter = 0;
-	Pointer( Real ) X = GetPointer( &   solution[0] + _sNodesBegin(depth) , _sNodesSize(depth) );
-	Pointer( Real ) B = GetPointer( &constraints[0] + _sNodesBegin(depth) , _sNodesSize(depth) );
-	SparseMatrix< Real > M;
-	double&   systemTime = stats.  systemTime;
-	double&    solveTime = stats.   solveTime;
-	double& evaluateTime = stats.evaluateTime;
-	systemTime = solveTime = evaluateTime = 0.;
-
-	if( coarseToFine )
-	{
-		if( depth>0 )
-		{
-			// Up-sample the cumulative change in solution @(depth-2) into the cumulative change in solution @(depth-1)
-			if( depth-2>=0 ) _upSample< Real , FEMDegree , BType >( depth-1 , metSolution );
-			// Add in the change in solution @(depth-1)
-#pragma omp parallel for num_threads( threads )
-			for( int i=_sNodesBegin(depth-1) ; i<_sNodesEnd(depth-1) ; i++ ) metSolution[i] += solution[i];
-			// evaluate the points @(depth) using the cumulative change in solution @(depth-1)
-			if( interpolationInfo )
-			{
-				evaluateTime = Time();
-				_setPointValuesFromCoarser( *interpolationInfo , depth , bsData , metSolution );
-				evaluateTime = Time() - evaluateTime;
-			}
-		}
-	}
-	else if( depth<_maxDepth ) for( int i=_sNodesBegin(depth) ; i<_sNodesEnd(depth) ; i++ ) constraints[i] -= metConstraints[i];
-
-	// Get the system matrix (and adjust the right-hand-side based on the coarser solution if prolonging)
-	systemTime = Time();
-	_getMatrixAndUpdateConstraints( F , interpolationInfo , M , constraints , integrator , childIntegrator , bsData , depth , metSolution , coarseToFine );
-	systemTime = Time()-systemTime;
-
-	solveTime = Time();
-	// Solve the linear system
-	accuracy = Real( accuracy / 100000 ) * M.rows;
-	int dim = _BSplineEnd< FEMDegree , BType >( depth ) - _BSplineBegin< FEMDegree , BType >( depth );
-	int nonZeroRows = 0;
-	for( int i=0 ; i<M.rows ; i++ ) if( M.rowSizes[i] ) nonZeroRows++;
-	bool addDCTerm = ( nonZeroRows==dim*dim*dim && ( !interpolationInfo || !interpolationInfo->valueWeight ) && HasPartitionOfUnity< BType >() && F.vanishesOnConstants() );
-	double bNorm = 0 , inRNorm = 0 , outRNorm = 0;
-	if( computeNorms )
-	{
-#pragma omp parallel for num_threads( threads ) reduction( + : bNorm , inRNorm )
-		for( int j=0 ; j<M.rows ; j++ )
-		{
-			Real temp = Real(0);
-			ConstPointer( MatrixEntry< Real > ) start = M[j];
-			ConstPointer( MatrixEntry< Real > ) end = start + M.rowSizes[j];
-			ConstPointer( MatrixEntry< Real > ) e;
-			for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
-			bNorm += B[j] * B[j];
-			inRNorm += ( temp-B[j] ) * ( temp-B[j] );
-		}
-	}
-
-	iters = std::min< int >( nonZeroRows , iters );
-	if( iters ) iter += SparseMatrix< Real >::SolveCG( M , ( ConstPointer( Real ) )B , iters , X , Real( accuracy ) , 0 , addDCTerm , false , threads );
-
-	solveTime = Time()-solveTime;
-	if( computeNorms )
-	{
-#pragma omp parallel for num_threads( threads ) reduction( + : outRNorm )
-		for( int j=0 ; j<M.rows ; j++ )
-		{
-			Real temp = Real(0);
-			ConstPointer( MatrixEntry< Real > ) start = M[j];
-			ConstPointer( MatrixEntry< Real > ) end = start + M.rowSizes[j];
-			ConstPointer( MatrixEntry< Real > ) e;
-			for( e=start ; e!=end ; e++ ) temp += X[ e->N ] * e->Value;
-			outRNorm += ( temp-B[j] ) * ( temp-B[j] );
-		}
-		stats.bNorm2 = bNorm , stats.inRNorm2 = inRNorm , stats.outRNorm2 = outRNorm;
-	}
-
-	// Copy the old solution into the buffer, write in the new solution, compute the change, and update the met solution
-	if( !coarseToFine && depth>0 )
-	{
-		// Explicitly compute the restriction of the met solution onto the coarser nodes
-		// and down-sample the previous accumulation
-		{
-			_updateCumulativeIntegralConstraintsFromFiner( F , bsData , depth , solution , metConstraints );
-			if( interpolationInfo ) _updateCumulativeInterpolationConstraintsFromFiner( *interpolationInfo , bsData , depth , solution , metConstraints );
-			if( depth>_maxDepth ) _downSample< Real , FEMDegree , BType >( depth , metConstraints );
-		}
-	}
-	memoryUsage();
-	return iter;
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-int Octree< Real >::_getMatrixRowSize( const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors ) const
-{
-	static const int OverlapSize   =   BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-
-	int count = 0;
-	int nodeIndex = neighbors.neighbors[OverlapRadius][OverlapRadius][OverlapRadius]->nodeData.nodeIndex;
-	const TreeOctNode* const * _nodes = &neighbors.neighbors[0][0][0];
-	for( int i=0 ; i<OverlapSize*OverlapSize*OverlapSize ; i++ ) if( _isValidFEMNode( _nodes[i] ) ) count++;
-	return count;
-}
-
-
-template< class Real >
-template< int FEMDegree1 , int FEMDegree2 >
-void Octree< Real >::_SetParentOverlapBounds( const TreeOctNode* node , int& startX , int& endX , int& startY , int& endY , int& startZ , int& endZ )
-{
-	const int OverlapStart = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapStart;
-
-	if( node->parent )
-	{
-		int x , y , z , c = (int)( node - node->parent->children );
-		Cube::FactorCornerIndex( c , x , y , z );
-		startX = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapStart[x]-OverlapStart , endX = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapEnd[x]-OverlapStart+1;
-		startY = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapStart[y]-OverlapStart , endY = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapEnd[y]-OverlapStart+1;
-		startZ = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapStart[z]-OverlapStart , endZ = BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::ParentOverlapEnd[z]-OverlapStart+1;
-	}
-}
-
-// It is assumed that at this point, the evaluationg of the current depth's points, using the coarser resolution solution
-// has already happened
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-void Octree< Real >::_updateConstraintsFromCoarser( const FEMSystemFunctor& F , const InterpolationInfo<  HasGradients >* interpolationInfo , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& pNeighbors , TreeOctNode* node , DenseNodeData< Real , FEMDegree >& constraints , const DenseNodeData< Real , FEMDegree >& metSolution , const typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const Stencil< double , BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& lapStencil , const BSplineData< FEMDegree , BType >& bsData ) const
-{
-	static const int LeftSupportRadius  = -BSplineSupportSizes< FEMDegree >::SupportStart;
-	static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-
-	if( _localDepth( node )<=0 ) return;
-	// This is a conservative estimate as we only need to make sure that the parent nodes don't overlap the child (not the parent itself)
-	bool isInterior = _isInteriorlyOverlapped< FEMDegree , FEMDegree >( node->parent );
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-
-	// Offset the constraints using the solution from lower resolutions.
-	int startX , endX , startY , endY , startZ , endZ;
-	_SetParentOverlapBounds< FEMDegree , FEMDegree >( node , startX , endX , startY , endY , startZ , endZ );
-
-	for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-		if( _isValidFEMNode( pNeighbors.neighbors[x][y][z] ) )
-		{
-			const TreeOctNode* _node = pNeighbors.neighbors[x][y][z];
-			Real _solution = metSolution[ _node->nodeData.nodeIndex ];
-			{
-				if( isInterior ) constraints[ node->nodeData.nodeIndex ] -= Real( lapStencil( x , y , z ) * _solution );
-				else
-				{
-					LocalDepth _d ; LocalOffset _off;
-					_localDepthAndOffset( _node , _d , _off );
-					constraints[ node->nodeData.nodeIndex ] -= (Real)F.integrate( childIntegrator , _off , off ) * _solution;
-				}
-			}
-		}
-
-	if( interpolationInfo )
-	{
-		double constraint = 0;
-		int fIdx[3];
-		functionIndex< FEMDegree , BType >( node , fIdx );
-		// evaluate the current node's basis function at adjacent points
-		for( int x=-LeftSupportRadius ; x<=RightSupportRadius ; x++ ) for( int y=-LeftSupportRadius ; y<=RightSupportRadius ; y++ ) for( int z=-LeftSupportRadius ; z<=RightSupportRadius ; z++ )
-		{
-			const TreeOctNode* _node = neighbors.neighbors[x+OverlapRadius][y+OverlapRadius][z+OverlapRadius];
-			if( _isValidSpaceNode( _node ) && (*interpolationInfo)( _node ) )
-			{
-				const PointData< Real , HasGradients >& pData = *( (*interpolationInfo)( _node ) );
-				constraint += _ConstraintCalculator_< Real , FEMDegree , HasGradients >::_CalculateConstraint_
-					(
-						pData ,
-						bsData. baseBSplines[ fIdx[0] ][x+LeftSupportRadius] ,
-						bsData. baseBSplines[ fIdx[1] ][y+LeftSupportRadius] ,
-						bsData. baseBSplines[ fIdx[2] ][z+LeftSupportRadius] ,
-						bsData.dBaseBSplines[ fIdx[0] ][x+LeftSupportRadius] ,
-						bsData.dBaseBSplines[ fIdx[1] ][y+LeftSupportRadius] ,
-						bsData.dBaseBSplines[ fIdx[2] ][z+LeftSupportRadius]
-					);
-			}
-		}
-		constraints[ node->nodeData.nodeIndex ] -= Real( constraint );
-	}
-}
-
-// Given the solution @( depth ) add to the met constraints @( depth-1 )
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor >
-void Octree< Real >::_updateCumulativeIntegralConstraintsFromFiner( const FEMSystemFunctor& F , const BSplineData< FEMDegree , BType >& bsData , LocalDepth highDepth , const DenseNodeData< Real , FEMDegree >& fineSolution , DenseNodeData< Real , FEMDegree >& coarseConstraints ) const
-{
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) > childIntegrator;
-	BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetChildIntegrator( childIntegrator , highDepth-1 );
-
-	static const int OverlapSize   =   BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-	typedef typename TreeOctNode::NeighborKey< -BSplineSupportSizes< FEMDegree >::SupportStart , BSplineSupportSizes< FEMDegree >::SupportEnd >SupportKey;
-
-	if( highDepth<=0 ) return;
-	// Get the stencil describing the Laplacian relating coefficients @(depth) with coefficients @(depth-1)
-	Stencil< double , OverlapSize > stencils[2][2][2];
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencils( F , childIntegrator , stencils );
-	size_t start = _sNodesBegin( highDepth) , end = _sNodesEnd(highDepth) , range = end-start;
-	int lStart = _sNodesBegin(highDepth-1);
-
-	// Iterate over the nodes @( depth )
-	std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( highDepth )-1 );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(highDepth) ; i<_sNodesEnd(highDepth) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-		TreeOctNode* node = _sNodes.treeNodes[i];
-
-		// Offset the coarser constraints using the solution from the current resolutions.
-		int x , y , z , c;
-		c = int( node - node->parent->children );
-		Cube::FactorCornerIndex( c , x , y , z );
-		{
-			typename TreeOctNode::Neighbors< OverlapSize > pNeighbors;
-			neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( node->parent , pNeighbors );
-			const Stencil< double , OverlapSize >& stencil = stencils[x][y][z];
-
-			bool isInterior = _isInteriorlyOverlapped< FEMDegree , FEMDegree >( node->parent );
-			LocalDepth d ; LocalOffset off;
-			_localDepthAndOffset( node , d , off );
-
-			// Offset the constraints using the solution from finer resolutions.
-			int startX , endX , startY , endY , startZ , endZ;
-			_SetParentOverlapBounds< FEMDegree , FEMDegree >( node , startX , endX , startY  , endY , startZ , endZ );
-
-			Real solution = fineSolution[ node->nodeData.nodeIndex ];
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-				if( _isValidFEMNode( pNeighbors.neighbors[x][y][z] ) )
-				{
-					const TreeOctNode* _node = pNeighbors.neighbors[x][y][z];
-					if( isInterior )
-#pragma omp atomic
-						coarseConstraints[ _node->nodeData.nodeIndex ] += Real( stencil( x , y , z ) * solution );
-					else
-					{
-						LocalDepth _d ; LocalOffset _off;
-						_localDepthAndOffset( _node , _d , _off );
-#pragma omp atomic
-						coarseConstraints[ _node->nodeData.nodeIndex ] += Real( F.integrate( childIntegrator , _off , off ) * solution );
-					}
-				}
-		}
-	}
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-void Octree< Real >::setSystemMatrix( const FEMSystemFunctor& F , const InterpolationInfo<  HasGradients >* interpolationInfo , LocalDepth depth , SparseMatrix< Real >& matrix ) const
-{
-	if( depth<0 || depth>_maxDepth ) fprintf( stderr , "[ERROR] System depth out of bounds: %d <= %d <= %d\n" , 0 , depth , _maxDepth ) , exit( 0 );
-	typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) > integrator;
-	BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::SetIntegrator( integrator , depth );
-	BSplineData< FEMDegree , BType > bsData( depth );
-
-	static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-	static const int OverlapSize   =   BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-
-	Stencil< double , OverlapSize > stencil;
-	SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::SetCentralSystemStencil ( F , integrator , stencil );
-
-	matrix.Resize( _sNodesSize(depth) );
-	std::vector< AdjacenctNodeKey > neighborKeys( std::max< int >( 1 , threads ) );
-	for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( depth ) );
-#pragma omp parallel for num_threads( threads )
-	for( int i=_sNodesBegin(depth) ; i<_sNodesEnd( depth ) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-	{
-		int ii = i - _sNodesBegin(depth);
-		AdjacenctNodeKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-
-		typename TreeOctNode::Neighbors< OverlapSize > neighbors;
-		neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( _sNodes.treeNodes[i] , neighbors );
-
-		matrix.SetRowSize( ii , _getMatrixRowSize< FEMDegree , BType >( neighbors ) );
-		matrix.rowSizes[ii] = _setMatrixRow( F , interpolationInfo , neighbors , matrix[ii] , _sNodesBegin(depth) , integrator , stencil , bsData );
-	}
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-DenseNodeData< Real , FEMDegree > Octree< Real >::solveSystem( const FEMSystemFunctor& F , InterpolationInfo< HasGradients >* interpolationInfo , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxSolveDepth , const typename Octree< Real >::SolverInfo& solverInfo )
-{
-	BSplineData< FEMDegree , BType > bsData( maxSolveDepth );
-
-	maxSolveDepth = std::min< LocalDepth >( maxSolveDepth , _maxDepth );
-	int iter = 0;
-	const int _iters = std::max< int >( 0 , solverInfo.iters );
-
-	DenseNodeData< Real , FEMDegree > solution( _sNodesEnd( _maxDepth ) );
-	memset( &solution[0] , 0 , sizeof(Real) * _sNodesEnd( _maxDepth ) );
-
-	DenseNodeData< Real , FEMDegree > metSolution( _sNodesEnd( _maxDepth-1 ) );
-	memset( &metSolution[0] , 0 , sizeof(Real)*_sNodesEnd( _maxDepth-1 ) );
-	for( LocalDepth d=0 ; d<=maxSolveDepth ; d++ )
-	{
-		int iters = (int)ceil( _iters * pow( solverInfo.lowResIterMultiplier , maxSolveDepth-d ) );
-		_SolverStats sStats;
-		if( !d ) iter = _solveSystemCG( F , bsData , interpolationInfo , d , solution , constraints , metSolution , _sNodesSize(d) , true , sStats , solverInfo.showResidual , 0 );
-		else
-		{
-			if( d>solverInfo.cgDepth ) iter = _solveSystemGS( F , bsData , interpolationInfo , d , solution , constraints , metSolution , iters , true , sStats , solverInfo.showResidual );
-			else                       iter = _solveSystemCG( F , bsData , interpolationInfo , d , solution , constraints , metSolution , iters , true , sStats , solverInfo.showResidual , solverInfo.cgAccuracy );
-		}
-		int femNodes = 0;
-#pragma omp parallel for reduction( + : femNodes )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) ) femNodes++;
-		if( solverInfo.verbose )
-		{
-			if( maxSolveDepth<10 ) printf( "Depth[%d/%d]:\t" , d , maxSolveDepth );
-			else                   printf( "Depth[%2d/%d]:\t" , d , maxSolveDepth );
-			printf( "Evaluated / Got / Solved in: %6.3f / %6.3f / %6.3f\t(%.3f MB)\tNodes: %d\n" , sStats.evaluateTime , sStats.systemTime , sStats.solveTime , _localMemoryUsage , femNodes );
-		}
-		if( solverInfo.showResidual && iters )
-		{
-			for( LocalDepth dd=0 ; dd<d ; dd++ ) printf( "  " );
-			printf( "%s: %.4e -> %.4e -> %.4e (%.2e) [%d]\n" , d<=solverInfo.cgDepth ? "CG" : "GS" , sqrt( sStats.bNorm2 ) , sqrt( sStats.inRNorm2 ) , sqrt( sStats.outRNorm2 ) , sqrt( sStats.outRNorm2  / sStats.bNorm2 ) , iters );
-		}
-	}
-	memoryUsage();
-	return solution;
-}
-
-template< class Real >
-template< int FEMDegree >
-DenseNodeData< Real , FEMDegree > Octree< Real >::initDenseNodeData( void )
-{
-	DenseNodeData< Real , FEMDegree > constraints( _sNodes.size() );
-	memset( &constraints[0] , 0 , sizeof(Real)*_sNodes.size() );
-	return constraints;
-}
-template< > template< > float  Octree< float  >::_Dot( const float & r1 , const float & r2 ){ return r1*r2; }
-template< > template< > double Octree< double >::_Dot( const double& r1 , const double& r2 ){ return r1*r2; }
-template< > template< > float  Octree< float  >::_Dot( const Point3D< float  >& p1 , const Point3D< float  >& p2 ){ return Point3D< float  >::Dot( p1 , p2 ); }
-template< > template< > double Octree< double >::_Dot( const Point3D< double >& p1 , const Point3D< double >& p2 ){ return Point3D< double >::Dot( p1 , p2 ); }
-template< > template< > bool Octree< float  >::_IsZero( const float & r ){ return r==0; }
-template< > template< > bool Octree< double >::_IsZero( const double& r ){ return r==0; }
-template< > template< > bool Octree< float  >::_IsZero( const Point3D< float  >& p ){ return p[0]==0 && p[1]==0 && p[2]==0; }
-template< > template< > bool Octree< double >::_IsZero( const Point3D< double >& p ){ return p[0]==0 && p[1]==0 && p[2]==0; }
-template< class Real >
-template< int FEMDegree , BoundaryType FEMBType , int CDegree , BoundaryType CBType , class FEMConstraintFunctor , class Coefficients , class D , class _D >
-void Octree< Real >::_addFEMConstraints( const FEMConstraintFunctor& F , const Coefficients& coefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-{
-	typedef typename TreeOctNode::NeighborKey< -BSplineSupportSizes< FEMDegree >::SupportStart , BSplineSupportSizes< FEMDegree >::SupportEnd > SupportKey;
-	const int      CFEMOverlapSize   =  BSplineOverlapSizes< CDegree , FEMDegree >::OverlapSize;
-	const int  LeftCFEMOverlapRadius = -BSplineOverlapSizes< CDegree , FEMDegree >::OverlapStart;
-	const int RightCFEMOverlapRadius =  BSplineOverlapSizes< CDegree , FEMDegree >::OverlapEnd;
-	const int  LeftFEMCOverlapRadius = -BSplineOverlapSizes< FEMDegree , CDegree >::OverlapStart;
-	const int RightFEMCOverlapRadius =  BSplineOverlapSizes< FEMDegree , CDegree >::OverlapEnd;
-
-	// To set the constraints, we iterate over the
-	// splatted normals and compute the dot-product of the
-	// divergence of the normal field with all the basis functions.
-	// Within the same depth: set directly as a gather
-	// Coarser depths 
-	maxDepth = std::min< LocalDepth >( maxDepth , _maxDepth );
-	DenseNodeData< Real , FEMDegree >* __constraints = new DenseNodeData< Real , FEMDegree >( _sNodesEnd(maxDepth-1) );
-	DenseNodeData< Real , FEMDegree >& _constraints = *__constraints;
-	memset( &_constraints[0] , 0 , sizeof(Real)*( _sNodesEnd(maxDepth-1) ) );
-	memoryUsage();
-
-	for( LocalDepth d=maxDepth ; d>=0 ; d-- )
-	{
-		Stencil< _D , CFEMOverlapSize > stencil , stencils[2][2][2];
-		typename SystemCoefficients< CDegree , CBType , FEMDegree , FEMBType >::     Integrator      integrator;
-		typename SystemCoefficients< FEMDegree , FEMBType , CDegree , CBType >::ChildIntegrator childIntegrator;
-		BSplineIntegrationData< CDegree , CBType , FEMDegree , FEMBType >::SetIntegrator( integrator , d );
-		if( d>0 ) BSplineIntegrationData< FEMDegree , FEMBType , CDegree , CBType >::SetChildIntegrator( childIntegrator , d-1 );
-		SystemCoefficients< CDegree , CBType , FEMDegree , FEMBType >::template SetCentralConstraintStencil < false >( F,      integrator , stencil  );
-		SystemCoefficients< FEMDegree , FEMBType , CDegree , CBType >::template SetCentralConstraintStencils< true  >( F, childIntegrator , stencils );
-
-		std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
-
-#pragma omp parallel for num_threads( threads )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
-		{
-			SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-			TreeOctNode* node = _sNodes.treeNodes[i];
-			int startX=0 , endX=CFEMOverlapSize , startY=0 , endY=CFEMOverlapSize , startZ=0 , endZ=CFEMOverlapSize;
-			typename TreeOctNode::Neighbors< CFEMOverlapSize > neighbors;
-			neighborKey.template getNeighbors< false , LeftFEMCOverlapRadius , RightFEMCOverlapRadius >( node , neighbors );
-			bool isInterior = _isInteriorlyOverlapped< FEMDegree , CDegree >( node ) , isInterior2 = _isInteriorlyOverlapped< CDegree , FEMDegree >( node->parent );
-
-			LocalDepth d ; LocalOffset off;
-			_localDepthAndOffset( node , d , off );
-			// Set constraints from current depth
-			// Gather the constraints from the vector-field at _node into the constraint stored with node
-			if( _isValidFEMNode( node ) )
-			{
-				for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-				{
-					const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-					if( isValidFEMNode< CDegree , CBType >( _node ) )
-					{
-						const D* d = coefficients( _node );
-						if( d ) 
-							if( isInterior ) constraints[i] += _Dot( (D)stencil( x , y , z ) , *d );
-							else
-							{
-								LocalDepth _d ; LocalOffset _off;
-								_localDepthAndOffset( _node , _d , _off );
-								constraints[i] += _Dot( *d , (D)F.template integrate< false >( integrator , _off , off ) );
-							}
-					}
-				}
-				_SetParentOverlapBounds< CDegree , FEMDegree >( node , startX , endX , startY , endY , startZ , endZ );
-			}
-			if( !isValidFEMNode< CDegree , CBType >( node ) ) continue;
-			const D* _data = coefficients( node );
-			if( !_data ) continue;
-			const D& data = *_data;
-			if( _IsZero( data ) ) continue;
-
-			// Set the _constraints for the parents
-			if( d>0 )
-			{
-				int cx , cy , cz;
-				Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy ,cz );
-				const Stencil< _D , CFEMOverlapSize >& _stencil = stencils[cx][cy][cz];
-
-				neighborKey.template getNeighbors< false , LeftCFEMOverlapRadius , RightCFEMOverlapRadius >( node->parent , neighbors );
-
-				for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-				{
-					TreeOctNode* _node = neighbors.neighbors[x][y][z];
-					if( _node && ( isInterior2 || _isValidFEMNode( _node ) ) )
-					{
-						TreeOctNode* _node = neighbors.neighbors[x][y][z];
-						Real c;
-						if( isInterior2 ) c = _Dot( (D)_stencil( x , y , z ) , data );
-						else
-						{
-							LocalDepth _d ; LocalOffset _off;
-							_localDepthAndOffset( _node , _d , _off );
-							c = _Dot( data , (D)F.template integrate< true >( childIntegrator , _off , off ) );
-						}
-#pragma omp atomic
-						_constraints[ _node->nodeData.nodeIndex ] += c;
-					}
-				}
-			}
-		}
-		memoryUsage();
-	}
-
-	// Fine-to-coarse down-sampling of constraints
-	for( LocalDepth d=maxDepth-1 ; d>0 ; d-- ) _downSample< Real , FEMDegree , FEMBType >( d , _constraints );
-
-	// Add the accumulated constraints from all finer depths
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<_sNodesEnd(maxDepth-1) ; i++ ) constraints[i] += _constraints[i];
-
-	delete __constraints;
-
-	DenseNodeData< D , CDegree > _coefficients( _sNodesEnd(maxDepth-1) );
-	memset( &_coefficients[0] , 0 , sizeof(D) * _sNodesEnd(maxDepth-1) );
-	for( LocalDepth d=maxDepth-1 ; d>=0 ; d-- )
-	{
-#pragma omp parallel for num_threads( threads )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( isValidFEMNode< CDegree , CBType >( _sNodes.treeNodes[i] ) )
-		{
-			const D* d = coefficients( _sNodes.treeNodes[i] );
-			if( d )	_coefficients[i] += *d;
-		}
-	}
-
-	// Coarse-to-fine up-sampling of coefficients
-	for( LocalDepth d=1 ; d<maxDepth ; d++ ) _upSample< D , CDegree , CBType >( d , _coefficients );
-
-	// Compute the contribution from all coarser depths
-	for( LocalDepth d=1 ; d<=maxDepth ; d++ )
-	{
-		size_t start = _sNodesBegin( d ) , end = _sNodesEnd( d ) , range = end - start;
-		Stencil< _D , CFEMOverlapSize > stencils[2][2][2];
-		typename SystemCoefficients< CDegree , CBType , FEMDegree , FEMBType >::ChildIntegrator childIntegrator;
-		BSplineIntegrationData< CDegree , CBType , FEMDegree , FEMBType >::SetChildIntegrator( childIntegrator , d-1 );
-		SystemCoefficients< CDegree , CBType , FEMDegree , FEMBType >::template SetCentralConstraintStencils< false >( F , childIntegrator , stencils );
-		std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
-#pragma omp parallel for num_threads( threads )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-		{
-			SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-			TreeOctNode* node = _sNodes.treeNodes[i];
-			int startX , endX , startY , endY , startZ , endZ;
-			_SetParentOverlapBounds< FEMDegree , CDegree >( node , startX , endX , startY , endY , startZ , endZ );
-			typename TreeOctNode::Neighbors< CFEMOverlapSize > pNeighbors;
-			neighborKey.template getNeighbors< false , LeftFEMCOverlapRadius , RightFEMCOverlapRadius >( node->parent , pNeighbors );
-
-			bool isInterior = _isInteriorlyOverlapped< FEMDegree , CDegree >( node->parent );
-			int cx , cy , cz;
-			if( d>0 )
-			{
-				int c = int( node - node->parent->children );
-				Cube::FactorCornerIndex( c , cx , cy , cz );
-			}
-			else cx = cy = cz = 0;
-			Stencil< _D , CFEMOverlapSize >& _stencil = stencils[cx][cy][cz];
-
-			Real constraint = Real(0);
-			LocalDepth d ; LocalOffset off;
-			_localDepthAndOffset( node , d , off );
-			for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-			{
-				TreeOctNode* _node = pNeighbors.neighbors[x][y][z];
-				if( isValidFEMNode< CDegree , CBType >( _node ) )
-				{
-					if( isInterior ) constraint += _Dot( _coefficients[ _node->nodeData.nodeIndex ] , (D)_stencil( x , y , z ) );
-					else
-					{
-						LocalDepth _d ; LocalOffset _off;
-						_localDepthAndOffset ( _node , _d , _off );
-						constraint += _Dot( _coefficients[ _node->nodeData.nodeIndex ] , (D)F.template integrate< false >( childIntegrator , _off , off ) );
-					}
-				}
-			}
-			constraints[i] += constraint;
-		}
-	}
-	memoryUsage();
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType , bool HasGradients >
-void Octree< Real >::addInterpolationConstraints( const InterpolationInfo< HasGradients >& interpolationInfo , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-{
-	typedef typename TreeOctNode::NeighborKey< -BSplineSupportSizes< FEMDegree >::SupportStart , BSplineSupportSizes< FEMDegree >::SupportEnd > SupportKey;
-	maxDepth = std::min< LocalDepth >( maxDepth , _maxDepth );
-	{
-		static const int OverlapSize = BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize;
-		static const int LeftSupportRadius  = -BSplineSupportSizes< FEMDegree >::SupportStart;
-		static const int RightSupportRadius =  BSplineSupportSizes< FEMDegree >::SupportEnd;
-		static const int OverlapRadius = - BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapStart;
-		BSplineData< FEMDegree , BType > bsData( _maxDepth );
-		for( int d=0 ; d<=maxDepth ; d++ )
-		{
-			std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( maxDepth ) );
-
-#pragma omp parallel for num_threads( threads )
-			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidFEMNode( _sNodes.treeNodes[i] ) )
-			{
-				TreeOctNode* node = _sNodes.treeNodes[i];
-				SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-				typename TreeOctNode::Neighbors< OverlapSize > neighbors;
-				neighborKey.template getNeighbors< false , OverlapRadius , OverlapRadius >( node , neighbors );
-
-				double constraint = 0;
-				int fIdx[3];
-				functionIndex< FEMDegree , BType >( node , fIdx );
-				// evaluate the current node's basis function at adjacent points
-				for( int x=-LeftSupportRadius ; x<=RightSupportRadius ; x++ ) for( int y=-LeftSupportRadius ; y<=RightSupportRadius ; y++ ) for( int z=-LeftSupportRadius ; z<=RightSupportRadius ; z++ )
-				{
-					const TreeOctNode* _node = neighbors.neighbors[x+OverlapRadius][y+OverlapRadius][z+OverlapRadius];
-					if( _isValidSpaceNode( _node ) && interpolationInfo( _node ) )
-					{
-						const PointData< Real , HasGradients >& pData = *( interpolationInfo( _node ) );
-						constraint += _ConstraintCalculator_< Real , FEMDegree , HasGradients >::_CalculateConstraint_
-							(
-								pData ,
-								bsData. baseBSplines[ fIdx[0] ][x+LeftSupportRadius] ,
-								bsData. baseBSplines[ fIdx[1] ][y+LeftSupportRadius] ,
-								bsData. baseBSplines[ fIdx[2] ][z+LeftSupportRadius] ,
-								bsData.dBaseBSplines[ fIdx[0] ][x+LeftSupportRadius] ,
-								bsData.dBaseBSplines[ fIdx[1] ][y+LeftSupportRadius] ,
-								bsData.dBaseBSplines[ fIdx[2] ][z+LeftSupportRadius] ,
-								interpolationInfo.valueWeight , interpolationInfo.gradientWeight
-							);
-					}
-				}
-				constraints[ node->nodeData.nodeIndex ] += (Real)constraint;
-			}
-		}
-		memoryUsage();
-	}
-}
-template< class Real >
-template< int FEMDegree1 , BoundaryType FEMBType1 , int FEMDegree2 , BoundaryType FEMBType2 , class DotFunctor , bool HasGradients , class Coefficients1 , class Coefficients2 >
-double Octree< Real >::_dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 ) const
-{
-	double dot = 0;
-
-	// Calculate the contribution from @(depth,depth)
-	{
-		typedef typename TreeOctNode::ConstNeighborKey< -BSplineSupportSizes< FEMDegree1 >::SupportStart , BSplineSupportSizes< FEMDegree1 >::SupportEnd > SupportKey;
-		const int      OverlapSize   =  BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapSize;
-		const int  LeftOverlapRadius = -BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapStart;
-		const int RightOverlapRadius =  BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapEnd;
-
-		for( LocalDepth d=0 ; d<=_maxDepth ; d++ )
-		{
-			Stencil< double , OverlapSize > stencil;
-			typename SystemCoefficients< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::Integrator integrator;
-			BSplineIntegrationData< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::SetIntegrator( integrator , d );
-			SystemCoefficients< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::template SetCentralConstraintStencil< false , DotFunctor >( F , integrator , stencil );
-
-			std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
-
-#pragma omp parallel for num_threads( threads ) reduction( + : dot )
-			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
-			{
-				const TreeOctNode* node = _sNodes.treeNodes[i];
-				const Real* _data1;
-				if( isValidFEMNode< FEMDegree1 , FEMBType1 >( node ) && ( _data1=coefficients1(node) ) )
-				{
-					SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-					typename TreeOctNode::ConstNeighbors< OverlapSize > neighbors;
-					neighborKey.template getNeighbors< LeftOverlapRadius , RightOverlapRadius >( node , neighbors );
-					bool isInterior = _isInteriorlyOverlapped< FEMDegree1 , FEMDegree2 >( node );
-
-					LocalDepth d ; LocalOffset off;
-					_localDepthAndOffset( node , d , off );
-
-					for( int x=0 ; x<OverlapSize ; x++ ) for( int y=0 ; y<OverlapSize ; y++ ) for( int z=0 ; z<OverlapSize ; z++ )
-					{
-						const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-						const Real* _data2;
-						if( isValidFEMNode< FEMDegree2 , FEMBType2 >( _node ) && ( _data2=coefficients2( _node ) ) )
-							if( isInterior ) dot += (*_data1) * (*_data2 ) * stencil( x , y , z );
-							else
-							{
-								LocalDepth _d ; LocalOffset _off;
-								_localDepthAndOffset( _node , _d , _off );
-								dot += (*_data1) * (*_data2) * F.template integrate< false >( integrator , off , _off );
-							}
-					}
-				}
-			}
-		}
-	}
-	// Calculate the contribution from @(<depth,depth)
-	{
-		typedef typename TreeOctNode::ConstNeighborKey< -BSplineSupportSizes< FEMDegree1 >::SupportStart , BSplineSupportSizes< FEMDegree1 >::SupportEnd > SupportKey;
-		const int      OverlapSize   =  BSplineOverlapSizes< FEMDegree2 , FEMDegree1 >::OverlapSize;
-		const int  LeftOverlapRadius = -BSplineOverlapSizes< FEMDegree2 , FEMDegree1 >::OverlapStart;
-		const int RightOverlapRadius =  BSplineOverlapSizes< FEMDegree2 , FEMDegree1 >::OverlapEnd;
-
-		DenseNodeData< Real , FEMDegree1 > cumulative1( _sNodesEnd( _maxDepth-1 ) );
-		if( _maxDepth>0 ) memset( &cumulative1[0] , 0 , sizeof(Real) * _sNodesEnd( _maxDepth-1 ) );
-
-		for( LocalDepth d=1 ; d<=_maxDepth ; d++ )
-		{
-			// Update the cumulative coefficients with the coefficients @(depth-1)
-#pragma omp parallel for
-			for( int i=_sNodesBegin(d-1) ; i<_sNodesEnd(d-1) ; i++ )
-			{
-				const Real* _data1 = coefficients1( _sNodes.treeNodes[i] );
-				if( _data1 ) cumulative1[i] += *_data1;
-			}
-
-			Stencil< double , OverlapSize > stencils[2][2][2];
-			typename SystemCoefficients< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::ChildIntegrator childIntegrator;
-			BSplineIntegrationData< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::SetChildIntegrator( childIntegrator , d-1 );
-			SystemCoefficients< FEMDegree1 , FEMBType1 , FEMDegree2 , FEMBType2 >::template SetCentralConstraintStencils< false >( F, childIntegrator , stencils );
-
-			std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
-
-#pragma omp parallel for num_threads( threads ) reduction( + : dot )
-			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
-			{
-				const TreeOctNode* node = _sNodes.treeNodes[i];
-				const Real* _data2;
-				if( isValidFEMNode< FEMDegree2 , FEMBType2 >( node ) && ( _data2=coefficients2( node ) ) )
-				{
-					SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-					bool isInterior = _isInteriorlyOverlapped< FEMDegree1 , FEMDegree2 >( node->parent );
-
-					LocalDepth d ; LocalOffset off;
-					_localDepthAndOffset( node , d , off );
-
-					int cx , cy , cz;
-					Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy ,cz );
-					const Stencil< double , OverlapSize >& _stencil = stencils[cx][cy][cz];
-					typename TreeOctNode::ConstNeighbors< OverlapSize > neighbors;
-					neighborKey.template getNeighbors< LeftOverlapRadius , RightOverlapRadius >( node->parent , neighbors );
-
-					int startX , endX , startY , endY , startZ , endZ;
-					_SetParentOverlapBounds< FEMDegree2 , FEMDegree1 >( node , startX , endX , startY , endY , startZ , endZ );
-					for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-					{
-						const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-						const Real* _data1;
-						if( isValidFEMNode< FEMDegree1 , FEMBType1 >( _node ) && ( _data1=cumulative1(_node) ) )
-						{
-							if( isInterior ) dot += (*_data1) * (*_data2) * _stencil( x , y , z );
-							else
-							{
-								LocalDepth _d ; LocalOffset _off;
-								_localDepthAndOffset( _node , _d , _off );
-								dot += (*_data1) * (*_data2) * F.template integrate< false >( childIntegrator , _off , off );
-							}
-						}
-					}
-				}
-			}
-			// Up sample the cumulative coefficients for the next level
-			if( d<_maxDepth ) _upSample< Real , FEMDegree1 , FEMBType1 >( d , cumulative1 );
-		}
-	}
-
-	// Calculate the contribution from @(>depth,depth)
-	{
-		typedef typename TreeOctNode::ConstNeighborKey< -BSplineSupportSizes< FEMDegree2 >::SupportStart , BSplineSupportSizes< FEMDegree2 >::SupportEnd > SupportKey;
-		const int      OverlapSize   =  BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapSize;
-		const int  LeftOverlapRadius = -BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapStart;
-		const int RightOverlapRadius =  BSplineOverlapSizes< FEMDegree1 , FEMDegree2 >::OverlapEnd;
-
-		DenseNodeData< Real , FEMDegree2 > cumulative2( _sNodesEnd( _maxDepth-1 ) );
-		if( _maxDepth>0 ) memset( &cumulative2[0] , 0 , sizeof(Real) * _sNodesEnd( _maxDepth-1 ) );
-
-		for( LocalDepth d=_maxDepth ; d>0 ; d-- )
-		{
-			Stencil< double , OverlapSize > stencils[2][2][2];
-			typename SystemCoefficients< FEMDegree2 , FEMBType2 , FEMDegree1 , FEMBType1 >::ChildIntegrator childIntegrator;
-			BSplineIntegrationData< FEMDegree2 , FEMBType2 , FEMDegree1 , FEMBType1 >::SetChildIntegrator( childIntegrator , d-1 );
-			SystemCoefficients< FEMDegree2 , FEMBType2 , FEMDegree1 , FEMBType1 >::template SetCentralConstraintStencils< true >( F , childIntegrator , stencils );
-
-			std::vector< SupportKey > neighborKeys( std::max< int >( 1 , threads ) );
-			for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d-1 ) );
-
-			// Update the cumulative constraints @(depth-1) from @(depth)
-#pragma omp parallel for num_threads( threads )
-			for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ )
-			{
-				const TreeOctNode* node = _sNodes.treeNodes[i];
-				const Real* _data1;
-				if( isValidFEMNode< FEMDegree1 , FEMBType1 >( node ) && ( _data1=coefficients1( node ) ) )
-				{
-					SupportKey& neighborKey = neighborKeys[ omp_get_thread_num() ];
-					bool isInterior = _isInteriorlyOverlapped< FEMDegree2 , FEMDegree1 >( node->parent );
-
-					LocalDepth d ; LocalOffset off;
-					_localDepthAndOffset( node , d , off );
-
-					int cx , cy , cz;
-					Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy ,cz );
-					const Stencil< double , OverlapSize >& _stencil = stencils[cx][cy][cz];
-					typename TreeOctNode::ConstNeighbors< OverlapSize > neighbors;
-					neighborKey.template getNeighbors< LeftOverlapRadius , RightOverlapRadius >( node->parent , neighbors );
-
-					int startX , endX , startY , endY , startZ , endZ;
-					_SetParentOverlapBounds< FEMDegree1 , FEMDegree2 >( node , startX , endX , startY , endY , startZ , endZ );
-
-					for( int x=startX ; x<endX ; x++ ) for( int y=startY ; y<endY ; y++ ) for( int z=startZ ; z<endZ ; z++ )
-					{
-						const TreeOctNode* _node = neighbors.neighbors[x][y][z];
-						if( isValidFEMNode< FEMDegree2 , FEMBType2 >( _node ) )
-						{
-							Real _dot;
-							if( isInterior ) _dot = (*_data1) * _stencil( x , y , z );
-							else
-							{
-								LocalDepth _d ; LocalOffset _off;
-								_localDepthAndOffset( _node , _d , _off );
-								_dot = (*_data1) * F.template integrate< true >( childIntegrator , _off , off );
-							}
-#pragma omp atomic
-							cumulative2[ _node->nodeData.nodeIndex ] += _dot;
-						}
-					}
-				}
-			}
-			// Update the dot-product using the cumulative constraints @(depth-1)
-#pragma omp parallel for num_threads( threads ) reduction( + : dot )
-			for( int i=_sNodesBegin(d-1) ; i<_sNodesEnd(d-1) ; i++ )
-			{
-				const TreeOctNode* node = _sNodes.treeNodes[i];
-				const Real* _data2;
-				if( isValidFEMNode< FEMDegree2 , FEMBType2 >( node ) && ( _data2=coefficients2( node ) ) ) dot += cumulative2[ node->nodeData.nodeIndex ] * (*_data2);
-			}
-
-			// Down-sample the cumulative constraints from @(depth-1) to @(depth-2) for the next pass
-			if( d-1>0 ) _downSample< Real , FEMDegree2 , FEMBType2 >( d-1 , cumulative2 );
-		}
-	}
-
-	if( iInfo )
-	{
-		MultiThreadedEvaluator< FEMDegree1 , FEMBType1 > mt1( this , coefficients1 , threads );
-		MultiThreadedEvaluator< FEMDegree2 , FEMBType2 > mt2( this , coefficients2 , threads );
-
-#pragma omp parallel for num_threads( threads ) reduction( + : dot )
-		for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth) ; i++ )
-		{
-			if( _isValidSpaceNode( _sNodes.treeNodes[i] ) && !_isValidSpaceNode( _sNodes.treeNodes[i]->children ) && (*iInfo)( _sNodes.treeNodes[i] ) )
-			{
-
-				const PointData< Real , HasGradients >& pData = *( (*iInfo)( _sNodes.treeNodes[i] ) );
-#if POINT_DATA_RES
-				for( int c=0 ; c<PointData< Real , false >::SAMPLES ; c++ ) if( pData[c].weight ) 
-				{
-					Point3D< Real > p = pData[c].position;
-					Real w = pData[c].weight;
-					if( HasGradients )
-					{
-						std::pair< Real , Point3D< Real > > v1 = mt1.valueAndGradient( p , omp_get_thread_num() );
-						std::pair< Real , Point3D< Real > > v2 = mt2.valueAndGradient( p , omp_get_thread_num() );
-						dot += v1.first * v2.first * w * iInfo->valueWeight + Point3D< Real >::Dot( v1.second , v2.second ) * w * iInfo->gradientWeight;
-					}
-					else dot += mt1.value( p , omp_get_thread_num() ) * mt2.value( p , omp_get_thread_num() ) * w * iInfo->valueWeight;
-				}
-#else // !POINT_DATA_RES
-				Point3D< Real > p = pData.position;
-				Real w = pData.weight;
-				if( HasGradients )
-				{
-					std::pair< Real , Point3D< Real > > v1 = mt1.valueAndGradient( p , omp_get_thread_num() );
-					std::pair< Real , Point3D< Real > > v2 = mt2.valueAndGradient( p , omp_get_thread_num() );
-					dot += v1.first * v2.first * w * iInfo->valueWeight + Point3D< Real >::Dot( v1.second , v2.second ) * w * iInfo->gradientWeight;
-				}
-				else dot += mt1.value( p , omp_get_thread_num() ) * mt2.value( p , omp_get_thread_num() ) * w * iInfo->valueWeight;
-#endif // POINT_DATA_RES
-			}
-		}
-	}
-
-	return dot;
-}
diff --git a/Src/MultiGridOctreeData.WeightedSamples.inl b/Src/MultiGridOctreeData.WeightedSamples.inl
deleted file mode 100644
index 279d91b..0000000
--- a/Src/MultiGridOctreeData.WeightedSamples.inl
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-// evaluate the result of splatting along a plane and then evaluating at a point on the plane.
-template< int Degree > double GetScaleValue( void )
-{
-	double centerValues[Degree+1];
-	Polynomial< Degree >::BSplineComponentValues( 0.5 , centerValues );
-	double scaleValue = 0;
-	for( int i=0 ; i<=Degree ; i++ ) scaleValue += centerValues[i] * centerValues[i];
-	return 1./ scaleValue;
-}
-template< class Real >
-template< int WeightDegree >
-void Octree< Real >::_addWeightContribution( DensityEstimator< WeightDegree >& densityWeights , TreeOctNode* node , Point3D< Real > position , PointSupportKey< WeightDegree >& weightKey , Real weight )
-{
-	static const double ScaleValue = GetScaleValue< WeightDegree >();
-	double dx[ DIMENSION ][ PointSupportKey< WeightDegree >::Size ];
-	typename TreeOctNode::Neighbors< PointSupportKey< WeightDegree >::Size >& neighbors = weightKey.template getNeighbors< true >( node , _NodeInitializer );
-	densityWeights.reserve( NodeCount() );
-	Point3D< Real > start;
-	Real w;
-	_startAndWidth( node , start , w );
-	for( int dim=0 ; dim<DIMENSION ; dim++ ) Polynomial< WeightDegree >::BSplineComponentValues( ( position[dim]-start[dim] ) / w , dx[dim] );
-
-	weight *= (Real)ScaleValue;
-
-	for( int i=0 ; i<PointSupportKey< WeightDegree >::Size ; i++ ) for( int j=0 ; j<PointSupportKey< WeightDegree >::Size ; j++ )
-	{
-		double dxdy = dx[0][i] * dx[1][j] * weight;
-		TreeOctNode** _neighbors = neighbors.neighbors[i][j];
-		for( int k=0 ; k<PointSupportKey< WeightDegree >::Size ; k++ ) if( _neighbors[k] ) densityWeights[ _neighbors[k] ] += Real( dxdy * dx[2][k] );
-	}
-}
-
-template< class Real >
-template< int WeightDegree , class PointSupportKey >
-Real Octree< Real >::_getSamplesPerNode( const DensityEstimator< WeightDegree >& densityWeights , const TreeOctNode* node , Point3D< Real > position , PointSupportKey& weightKey ) const
-{
-	Real weight = 0;
-	double dx[ DIMENSION ][ PointSupportKey::Size ];
-	const typename PointSupportKey::template Neighbors< PointSupportKey::Size >& neighbors = weightKey.getNeighbors( node );
-
-	Point3D< Real > start;
-	Real w;
-	_startAndWidth( node , start , w );
-
-	for( int dim=0 ; dim<DIMENSION ; dim++ ) Polynomial< WeightDegree >::BSplineComponentValues( ( position[dim]-start[dim] ) / w , dx[dim] );
-
-	for( int i=0 ; i<PointSupportKey::Size ; i++ ) for( int j=0 ; j<PointSupportKey::Size ; j++ )
-	{
-		double dxdy = dx[0][i] * dx[1][j];
-		for( int k=0 ; k<PointSupportKey::Size ; k++ ) if( neighbors.neighbors[i][j][k] )
-		{
-			const Real* w = densityWeights( neighbors.neighbors[i][j][k] );
-			if( w ) weight += Real( dxdy * dx[2][k] * (*w) );
-		}
-	}
-	return weight;
-}
-template< class Real >
-template< int WeightDegree , class PointSupportKey >
-void Octree< Real >::_getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , const TreeOctNode* node , Point3D< Real > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const
-{
-	const TreeOctNode* temp = node;
-	while( _localDepth( temp )>densityWeights.kernelDepth() ) temp = temp->parent;
-	weight = _getSamplesPerNode( densityWeights , temp , position , weightKey );
-	if( weight>=(Real)1. ) depth = Real( _localDepth( temp ) + log( weight ) / log(double(1<<(DIMENSION-1))) );
-	else
-	{
-		Real oldWeight , newWeight;
-		oldWeight = newWeight = weight;
-		while( newWeight<(Real)1. && temp->parent )
-		{
-			temp=temp->parent;
-			oldWeight = newWeight;
-			newWeight = _getSamplesPerNode( densityWeights , temp , position , weightKey );
-		}
-		depth = Real( _localDepth( temp ) + log( newWeight ) / log( newWeight / oldWeight ) );
-	}
-	weight = Real( pow( double(1<<(DIMENSION-1)) , -double(depth) ) );
-}
-template< class Real >
-template< int WeightDegree , class PointSupportKey >
-void Octree< Real >::_getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , Point3D< Real > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const
-{
-	TreeOctNode* temp;
-	Point3D< Real > myCenter( (Real)0.5 , (Real)0.5 , (Real)0.5 );
-	Real myWidth = Real( 1. );
-
-	// Get the finest node with depth less than or equal to the splat depth that contains the point
-	temp = _spaceRoot;
-	while( _localDepth( temp )<densityWeights.kernelDepth() )
-	{
-		if( !IsActiveNode( temp->children ) ) break;// fprintf( stderr , "[ERROR] Octree::GetSampleDepthAndWeight\n" ) , exit( 0 );
-		int cIndex = TreeOctNode::CornerIndex( myCenter , position );
-		temp = temp->children + cIndex;
-		myWidth /= 2;
-		if( cIndex&1 ) myCenter[0] += myWidth/2;
-		else		   myCenter[0] -= myWidth/2;
-		if( cIndex&2 ) myCenter[1] += myWidth/2;
-		else		   myCenter[1] -= myWidth/2;
-		if( cIndex&4 ) myCenter[2] += myWidth/2;
-		else		   myCenter[2] -= myWidth/2;
-	}
-	return _getSampleDepthAndWeight( densityWeights , temp , position , weightKey , depth , weight );
-}
-
-template< class Real >
-template< bool CreateNodes , int DataDegree , class V >
-void Octree< Real >::_splatPointData( TreeOctNode* node , Point3D< Real > position , V v , SparseNodeData< V , DataDegree >& dataInfo , PointSupportKey< DataDegree >& dataKey )
-{
-	double dx[ DIMENSION ][ PointSupportKey< DataDegree >::Size ];
-	typename TreeOctNode::Neighbors< PointSupportKey< DataDegree >::Size >& neighbors = dataKey.template getNeighbors< CreateNodes >( node , _NodeInitializer );
-	Point3D< Real > start;
-	Real w;
-	_startAndWidth( node , start , w );
-
-	for( int dd=0 ; dd<DIMENSION ; dd++ ) Polynomial< DataDegree >::BSplineComponentValues( ( position[dd]-start[dd] ) / w , dx[dd] );
-
-	for( int i=0 ; i<PointSupportKey< DataDegree >::Size ; i++ ) for( int j=0 ; j<PointSupportKey< DataDegree >::Size ; j++ )
-	{
-		double dxdy = dx[0][i] * dx[1][j];
-		for( int k=0 ; k<PointSupportKey< DataDegree >::Size ; k++ )
-			if( IsActiveNode( neighbors.neighbors[i][j][k] ) )
-			{
-				TreeOctNode* _node = neighbors.neighbors[i][j][k];
-
-				double dxdydz = dxdy * dx[2][k];
-				dataInfo[ _node ] += v * (Real)dxdydz;
-			}
-	}
-}
-template< class Real >
-template< bool CreateNodes , int WeightDegree , int DataDegree , class V >
-Real Octree< Real >::_splatPointData( const DensityEstimator< WeightDegree >& densityWeights , Point3D< Real > position , V v , SparseNodeData< V , DataDegree >& dataInfo , PointSupportKey< WeightDegree >& weightKey , PointSupportKey< DataDegree >& dataKey , LocalDepth minDepth , LocalDepth maxDepth , int dim )
-{
-	double dx;
-	V _v;
-	TreeOctNode* temp;
-	int cnt=0;
-	double width;
-	Point3D< Real > myCenter( (Real)0.5 , (Real)0.5 , (Real)0.5 );
-	Real myWidth = (Real)1.;
-
-	temp = _spaceRoot;
-	while( _localDepth( temp )<densityWeights.kernelDepth() )
-	{
-		if( !IsActiveNode( temp->children ) ) break;
-		int cIndex = TreeOctNode::CornerIndex( myCenter , position );
-		temp = temp->children + cIndex;
-		myWidth /= 2;
-		if( cIndex&1 ) myCenter[0] += myWidth/2;
-		else		   myCenter[0] -= myWidth/2;
-		if( cIndex&2 ) myCenter[1] += myWidth/2;
-		else 	  	   myCenter[1] -= myWidth/2;
-		if( cIndex&4 ) myCenter[2] += myWidth/2;
-		else 		   myCenter[2] -= myWidth/2;
-	}
-	Real weight , depth;
-	_getSampleDepthAndWeight( densityWeights , temp , position , weightKey , depth , weight );
-
-	if( depth<minDepth ) depth = Real(minDepth);
-	if( depth>maxDepth ) depth = Real(maxDepth);
-	int topDepth = int(ceil(depth));
-
-	dx = 1.0-(topDepth-depth);
-	if     ( topDepth<=minDepth ) topDepth = minDepth , dx = 1;
-	else if( topDepth> maxDepth ) topDepth = maxDepth , dx = 1;
-
-	while( _localDepth( temp )>topDepth ) temp=temp->parent;
-	while( _localDepth( temp )<topDepth )
-	{
-		if( !temp->children ) temp->initChildren( _NodeInitializer );
-		int cIndex = TreeOctNode::CornerIndex( myCenter , position );
-		temp = &temp->children[cIndex];
-		myWidth/=2;
-		if( cIndex&1 ) myCenter[0] += myWidth/2;
-		else		   myCenter[0] -= myWidth/2;
-		if( cIndex&2 ) myCenter[1] += myWidth/2;
-		else		   myCenter[1] -= myWidth/2;
-		if( cIndex&4 ) myCenter[2] += myWidth/2;
-		else		   myCenter[2] -= myWidth/2;
-	}
-	width = 1.0 / ( 1<<_localDepth( temp ) );
-	_v = v * weight / Real( pow( width , dim ) ) * Real( dx );
-	_splatPointData< CreateNodes >( temp , position , _v , dataInfo , dataKey );
-	if( fabs(1.0-dx) > EPSILON )
-	{
-		dx = Real(1.0-dx);
-		temp = temp->parent;
-		width = 1.0 / ( 1<<_localDepth( temp ) );
-
-		_v = v * weight / Real( pow( width , dim ) ) * Real( dx );
-		_splatPointData< CreateNodes >( temp , position , _v , dataInfo , dataKey );
-	}
-	return weight;
-}
-template< class Real >
-template< bool CreateNodes , int WeightDegree , int DataDegree , class V >
-Real Octree< Real >::_multiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , TreeOctNode* node , Point3D< Real > position , V v , SparseNodeData< V , DataDegree >& dataInfo , PointSupportKey< WeightDegree >& weightKey , PointSupportKey< DataDegree >& dataKey , int dim )
-{
-	Real _depth , weight;
-	if( densityWeights ) _getSampleDepthAndWeight( *densityWeights , position , weightKey , _depth , weight );
-	else weight = (Real)1.;
-	V _v = v * weight;
-
-	double dx[ DIMENSION ][ PointSupportKey< DataDegree >::Size ];
-	dataKey.template getNeighbors< CreateNodes >( node , _NodeInitializer );
-
-	for( TreeOctNode* _node=node ; _localDepth( _node )>=0 ; _node=_node->parent )
-	{
-		V __v = _v * (Real)pow( 1<<_localDepth( _node ) , dim );
-		Point3D< Real > start;
-		Real w;
-		_startAndWidth( _node , start , w );
-		for( int dd=0 ; dd<DIMENSION ; dd++ ) Polynomial< DataDegree >::BSplineComponentValues( ( position[dd]-start[dd] ) / w , dx[dd] );
-		typename TreeOctNode::Neighbors< PointSupportKey< DataDegree >::Size >& neighbors = dataKey.neighbors[ _localToGlobal( _localDepth( _node ) ) ];
-		for( int i=0 ; i<PointSupportKey< DataDegree >::Size ; i++ ) for( int j=0 ; j<PointSupportKey< DataDegree >::Size ; j++ )
-		{
-			double dxdy = dx[0][i] * dx[1][j];
-			for( int k=0 ; k<PointSupportKey< DataDegree >::Size ; k++ )
-				if( IsActiveNode( neighbors.neighbors[i][j][k] ) )
-				{
-					TreeOctNode* _node = neighbors.neighbors[i][j][k];
-					double dxdydz = dxdy * dx[2][k];
-					dataInfo[ _node ] += __v * (Real)dxdydz;
-				}
-		}
-	}
-	return weight;
-}
-
-template< class Real >
-template< class V , int DataDegree , BoundaryType BType , class Coefficients >
-V Octree< Real >::_evaluate( const Coefficients& coefficients , Point3D< Real > p , const BSplineData< DataDegree , BType >& bsData , const ConstPointSupportKey< DataDegree >& dataKey ) const
-{
-	V value = V(0);
-
-	for( int d=_localToGlobal( 0 ) ; d<=dataKey.depth() ; d++ )
-	{
-		double dx[ DIMENSION ][ PointSupportKey< DataDegree >::Size ];
-		memset( dx , 0 , sizeof( double ) * DIMENSION * PointSupportKey< DataDegree >::Size );
-		{
-			const TreeOctNode* n = dataKey.neighbors[d].neighbors[ PointSupportKey< DataDegree >::LeftRadius ][ PointSupportKey< DataDegree >::LeftRadius ][ PointSupportKey< DataDegree >::LeftRadius ];
-			if( !n ) fprintf( stderr , "[ERROR] Point is not centered on a node\n" ) , exit( 0 );
-			int fIdx[3];
-			functionIndex< DataDegree , BType >( n , fIdx );
-			int fStart , fEnd;
-			BSplineData< DataDegree , BType >::FunctionSpan( _localDepth( n ) , fStart , fEnd );
-			for( int dd=0 ; dd<DIMENSION ; dd++ ) for( int i=-PointSupportKey< DataDegree >::LeftRadius ; i<=PointSupportKey< DataDegree >::RightRadius ; i++ )
-				if( fIdx[dd]+i>=fStart && fIdx[dd]+i<fEnd ) dx[dd][i] = bsData.baseBSplines[ fIdx[dd]+i ][ -i+PointSupportKey< DataDegree >::RightRadius ]( p[dd] );
-		}
-		for( int i=0 ; i<PointSupportKey< DataDegree >::Size ; i++ ) for( int j=0 ; j<PointSupportKey< DataDegree >::Size ; j++ ) for( int k=0 ; k<PointSupportKey< DataDegree >::Size ; k++ )
-		{
-			const TreeOctNode* n = dataKey.neighbors[d].neighbors[i][j][k];
-			if( isValidFEMNode< DataDegree , BType >( n ) )
-			{
-				const V* v = coefficients( n );
-				if( v ) value += (*v) * (Real) ( dx[0][i] * dx[1][j] * dx[2][k] );
-			}
-		}
-	}
-
-	return value;
-}
-
-template< class Real >
-template< class V , int DataDegree , BoundaryType BType >
-Pointer( V ) Octree< Real >::voxelEvaluate( const DenseNodeData< V , DataDegree >& coefficients , int& res , Real isoValue , LocalDepth depth , bool primal )
-{
-	int begin , end , dim;
-	if( depth<=0 || depth>_maxDepth ) depth = _maxDepth;
-
-	// Initialize the coefficients at the coarsest level
-	Pointer( V ) _coefficients = NullPointer( V );
-	{
-		LocalDepth d = 0;
-		begin = _BSplineBegin< DataDegree , BType >( d ) , end = _BSplineEnd< DataDegree , BType >( d ) , dim = end - begin;
-		_coefficients = NewPointer< V >( dim * dim * dim );
-		memset( _coefficients , 0 , sizeof( V ) * dim  * dim * dim );
-#pragma omp parallel for num_threads( threads )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( !_outOfBounds< DataDegree , BType >( _sNodes.treeNodes[i] ) )
-		{
-			LocalDepth _d ; LocalOffset _off;
-			_localDepthAndOffset( _sNodes.treeNodes[i] , _d , _off );
-			_off[0] -= begin , _off[1] -= begin , _off[2] -= begin;
-			_coefficients[ _off[0] + _off[1]*dim + _off[2]*dim*dim ] = coefficients[i];
-		}
-	}
-
-	// Up-sample and add in the existing coefficients
-	for( LocalDepth d=1 ; d<=depth ; d++ )
-	{
-		begin = _BSplineBegin< DataDegree , BType >( d ) , end = _BSplineEnd< DataDegree , BType >( d ) , dim = end - begin;
-		Pointer( V ) __coefficients = NewPointer< V >( dim * dim *dim );
-		memset( __coefficients , 0 , sizeof( V ) * dim  * dim * dim );
-#pragma omp parallel for num_threads( threads )
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( !_outOfBounds< DataDegree , BType >( _sNodes.treeNodes[i] ) )
-		{
-			LocalDepth _d ; LocalOffset _off;
-			_localDepthAndOffset( _sNodes.treeNodes[i] , _d , _off );
-			_off[0] -= begin , _off[1] -= begin , _off[2] -= begin;
-			__coefficients[ _off[0] + _off[1]*dim + _off[2]*dim*dim ] = coefficients[i];
-		}
-		_UpSample< V , DataDegree , BType >( d , ( ConstPointer(V) )_coefficients , __coefficients , threads );
-		DeletePointer( _coefficients );
-		_coefficients = __coefficients;
-	}
-
-	res = 1<<depth;
-	if( primal ) res++;
-	Pointer( V ) values = NewPointer< V >( res*res*res );
-	memset( values , 0 , sizeof(V)*res*res*res );
-
-	if( primal )
-	{
-		// evaluate at the cell corners
-		typename BSplineEvaluationData< DataDegree , BType >::CornerEvaluator::Evaluator evaluator;
-		BSplineEvaluationData< DataDegree , BType >::SetCornerEvaluator( evaluator , depth );
-#pragma omp parallel for num_threads( threads )
-		for( int k=0 ; k<res ; k++ ) for( int j=0 ; j<res ; j++ ) for( int i=0 ; i<res ; i++ )
-		{
-			V value = values[ i + j*res + k*res*res ];
-			for( int kk=-BSplineSupportSizes< DataDegree >::CornerEnd ; kk<=-BSplineSupportSizes< DataDegree >::CornerStart ; kk++ ) if( k+kk>=begin && k+kk<end )
-				for( int jj=-BSplineSupportSizes< DataDegree >::CornerEnd ; jj<=-BSplineSupportSizes< DataDegree >::CornerStart ; jj++ ) if( j+jj>=begin && j+jj<end )
-				{
-					double weight = evaluator.value( k+kk , k , false ) * evaluator.value( j+jj , j , false );
-					int idx = (j+jj-begin)*dim + (k+kk-begin)*dim*dim;
-					for( int ii=-BSplineSupportSizes< DataDegree >::CornerEnd ; ii<=-BSplineSupportSizes< DataDegree >::CornerStart ; ii++ ) if( i+ii>=begin && i+ii<end )
-						value += _coefficients[ i + ii - begin + idx ] * Real( weight * evaluator.value( i + ii , i , false ) );
-				}
-			values[ i + j*res + k*res*res ] = value;
-		}
-	}
-	else
-	{
-		// evaluate at the cell centers
-		typename BSplineEvaluationData< DataDegree , BType >::CenterEvaluator::Evaluator evaluator;
-		BSplineEvaluationData< DataDegree , BType >::SetCenterEvaluator( evaluator , depth );
-#pragma omp parallel for num_threads( threads )
-		for( int k=0 ; k<res ; k++ ) for( int j=0 ; j<res ; j++ ) for( int i=0 ; i<res ; i++ )
-		{
-			V& value = values[ i + j*res + k*res*res ];
-			for( int kk=-BSplineSupportSizes< DataDegree >::SupportEnd ; kk<=-BSplineSupportSizes< DataDegree >::SupportStart ; kk++ ) if( k+kk>=begin && k+kk<end )
-				for( int jj=-BSplineSupportSizes< DataDegree >::SupportEnd ; jj<=-BSplineSupportSizes< DataDegree >::SupportStart ; jj++ ) if( j+jj>=begin && j+jj<end )
-				{
-					double weight = evaluator.value( k+kk , k , false ) * evaluator.value( j+jj , j , false );
-					int idx = (j+jj-begin)*dim + (k+kk-begin)*dim*dim;
-					for( int ii=-BSplineSupportSizes< DataDegree >::SupportEnd ; ii<=-BSplineSupportSizes< DataDegree >::SupportStart ; ii++ ) if( i+ii>=begin && i+ii<end )
-						value += _coefficients[ i + ii - begin + idx ] * Real( weight * evaluator.value( i+ii , i , false ) );
-				}
-		}
-	}
-	memoryUsage();
-	DeletePointer( _coefficients );
-	for( int i=0 ; i<res*res*res ; i++ ) values[i] -= isoValue;
-
-	return values;
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-SparseNodeData< Real , 0 > Octree< Real >::leafValues( const DenseNodeData< Real , FEMDegree >& coefficients ) const
-{
-	SparseNodeData< Real , 0 > values;
-	DenseNodeData< Real , FEMDegree > _coefficients( _sNodesEnd(_maxDepth-1) );
-	memset( &_coefficients[0] , 0 , sizeof(Real)*_sNodesEnd(_maxDepth-1) );
-	for( int i=_sNodes.begin( _localToGlobal( 0 ) ) ; i<_sNodesEnd(_maxDepth-1) ; i++ ) _coefficients[i] = coefficients[i];
-	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample( d , _coefficients );
-	for( LocalDepth d=_maxDepth ; d>=0 ; d-- )
-	{
-		_Evaluator< FEMDegree , BType > evaluator;
-		evaluator.set( d );
-		std::vector< ConstPointSupportKey< FEMDegree > > neighborKeys( std::max< int >( 1 , threads ) );
-		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-		{
-			ConstPointSupportKey< FEMDegree >& neighborKey = neighborKeys[ omp_get_thread_num() ];
-			TreeOctNode* node = _sNodes.treeNodes[i];
-			if( !IsActiveNode( node->children ) )
-			{
-				neighborKey.getNeighbors( node );
-				bool isInterior = _IsInteriorlySupported< FEMDegree >( node->parent );
-				values[ node ] = _getCenterValue( neighborKey , node , coefficients , _coefficients , evaluator , isInterior );
-			}
-		}
-	}
-	return values;
-}
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-SparseNodeData< Point3D< Real > , 0 > Octree< Real >::leafGradients( const DenseNodeData< Real , FEMDegree >& coefficients ) const
-{
-	SparseNodeData< Point3D< Real > , 0 > gradients;
-	DenseNodeData< Real , FEMDegree > _coefficients( _sNodesEnd(_maxDepth-1 ) );
-	memset( &_coefficients[0] , 0 , sizeof(Real)*_sNodesEnd(_maxDepth-1) );
-	for( int i=_sNodesBegin(0) ; i<_sNodesEnd(_maxDepth-1) ; i++ ) _coefficients[i] = coefficients[i];
-	for( LocalDepth d=1 ; d<_maxDepth ; d++ ) _upSample( d , _coefficients );
-	for( LocalDepth d=_maxDepth ; d>=0 ; d-- )
-	{
-		_Evaluator< FEMDegree , BType > evaluator;
-		evaluator.set( d );
-		std::vector< ConstPointSupportKey< FEMDegree > > neighborKeys( std::max< int >( 1 , threads ) );
-		for( size_t i=0 ; i<neighborKeys.size() ; i++ ) neighborKeys[i].set( _localToGlobal( d ) );
-		for( int i=_sNodesBegin(d) ; i<_sNodesEnd(d) ; i++ ) if( _isValidSpaceNode( _sNodes.treeNodes[i] ) )
-		{
-			ConstPointSupportKey< FEMDegree >& neighborKey = neighborKeys[ omp_get_thread_num() ];
-			TreeOctNode* node = _sNodes.treeNodes[i];
-			if( !IsActiveNode( node->children ) )
-			{
-				neighborKey.getNeighbors( node );
-				bool isInterior = _IsInteriorlySupported< FEMDegree >( node->parent );
-				gradients[ node ] = _getCenterValueAndGradient( neighborKey , node , coefficients , _coefficients , evaluator , isInterior ).second;
-			}
-		}
-	}
-	return gradients;
-}
diff --git a/Src/MultiGridOctreeData.h b/Src/MultiGridOctreeData.h
deleted file mode 100644
index 595ebdc..0000000
--- a/Src/MultiGridOctreeData.h
+++ /dev/null
@@ -1,988 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-// [COMMENTS]
-// -- Throughout the code, should make a distinction between indices and offsets
-// -- Make an instance of _evaluate that samples the finite-elements correctly (specifically, to handle the boundaries)
-// -- Make functions like depthAndOffset parity dependent (ideally all "depth"s should be relative to the B-Slpline resolution
-// -- Make all points relative to the unit-cube, regardless of degree parity
-// -- It's possible that for odd degrees, the iso-surfacing will fail because the leaves in the SortedTreeNodes do not form a partition of space
-// -- [MAYBE] Treat normal field as a sum of delta functions, rather than a smoothed signal (again, so that high degrees aren't forced to generate smooth reconstructions)
-// -- [MAYBE] Make the degree of the B-Spline with which the normals are splatted independent of the degree of the FEM system. (This way, higher degree systems aren't forced to generate smoother normal fields.)
-// -- [MAYBE] Remove the isValidFEM/isValidSpace functions since the octree supports all degrees/boundary types (up to the max degree for which finalizedBrooded... was called)
-
-// [TODO]
-// -- Currently, the implementation assumes that the boundary constraints are the same for vector fields and scalar fields
-// -- Modify the setting of the flags so that only the subset of the broods that are needed 
-
-#ifndef MULTI_GRID_OCTREE_DATA_INCLUDED
-#define MULTI_GRID_OCTREE_DATA_INCLUDED
-
-#define NEW_CODE
-#define FAST_SET_UP				// If enabled, kernel density estimation is done aglomeratively
-
-#define POINT_DATA_RES 0		// Specifies the resolution of the subgrid storing points with each voxel (0==1 but is faster)
-
-#define DATA_DEGREE 1			// The order of the B-Spline used to splat in data for color interpolation
-#define WEIGHT_DEGREE 2			// The order of the B-Spline used to splat in the weights for density estimation
-#define NORMAL_DEGREE 2			// The order of the B-Spline used to splat int the normals for constructing the Laplacian constraints
-//#define MAX_MEMORY_GB 15		// The maximum memory the application is allowed to use
-#define MAX_MEMORY_GB 0
-
-#include <unordered_map>
-#include <omp.h>
-#include "BSplineData.h"
-#include "PointStream.h"
-#include "Geometry.h"
-#include "Octree.h"
-#include "SparseMatrix.h"
-
-#ifndef _OPENMP
-int omp_get_num_procs( void ){ return 1; }
-int omp_get_thread_num( void ){ return 0; }
-#endif // _OPENMP
-
-#define DERIVATIVES( Degree ) ( ( Degree>1 ) ? 2 : ( Degree==1 ? 1 : 0 ) )
-
-class TreeNodeData
-{
-public:
-	enum
-	{
-		SPACE_FLAG = 1 ,
-		FEM_FLAG = 2 ,
-		GHOST_FLAG = 1<<7
-	};
-	int nodeIndex;
-	char flags;
-
-	void setGhostFlag( bool f ){ if( f ) flags |= GHOST_FLAG ; else flags &= ~GHOST_FLAG; }
-	bool getGhostFlag( void ) const { return ( flags & GHOST_FLAG )!=0; }
-	TreeNodeData( void );
-	~TreeNodeData( void );
-};
-
-class VertexData
-{
-	typedef OctNode< TreeNodeData > TreeOctNode;
-public:
-	static const int VERTEX_COORDINATE_SHIFT = ( sizeof( long long ) * 8 ) / 3;
-	static long long   EdgeIndex( const TreeOctNode* node , int eIndex , int maxDepth , int index[DIMENSION] );
-	static long long   EdgeIndex( const TreeOctNode* node , int eIndex , int maxDepth );
-	static long long   FaceIndex( const TreeOctNode* node , int fIndex , int maxDepth,int index[DIMENSION] );
-	static long long   FaceIndex( const TreeOctNode* node , int fIndex , int maxDepth );
-	static long long CornerIndex( const TreeOctNode* node , int cIndex , int maxDepth , int index[DIMENSION] );
-	static long long CornerIndex( const TreeOctNode* node , int cIndex , int maxDepth );
-	static long long CenterIndex( const TreeOctNode* node , int maxDepth , int index[DIMENSION] );
-	static long long CenterIndex( const TreeOctNode* node , int maxDepth );
-	static long long CornerIndex( int depth , const int offSet[DIMENSION] , int cIndex , int maxDepth , int index[DIMENSION] );
-	static long long CenterIndex( int depth , const int offSet[DIMENSION] , int maxDepth , int index[DIMENSION] );
-	static long long CornerIndexKey( const int index[DIMENSION] );
-};
-
-// This class stores the octree nodes, sorted by depth and then by z-slice.
-// To support primal representations, the initializer takes a function that
-// determines if a node should be included/indexed in the sorted list.
-// [NOTE] Indexing of nodes is _GLOBAL_
-class SortedTreeNodes
-{
-	typedef OctNode< TreeNodeData > TreeOctNode;
-protected:
-	Pointer( Pointer( int ) ) _sliceStart;
-	int _levels;
-public:
-	Pointer( TreeOctNode* ) treeNodes;
-	int begin( int depth ) const{ return _sliceStart[depth][0]; }
-	int   end( int depth ) const{ return _sliceStart[depth][(size_t)1<<depth]; }
-	int begin( int depth , int slice ) const{ return _sliceStart[depth][slice  ]  ; }
-	int   end( int depth , int slice ) const{ if(depth<0||depth>=_levels||slice<0||slice>=(1<<depth)) printf( "uh oh\n" ) ; return _sliceStart[depth][slice+1]; }
-	int size( void ) const { return _sliceStart[_levels-1][(size_t)1<<(_levels-1)]; }
-	int size( int depth ) const { if(depth<0||depth>=_levels) printf( "uhoh\n" ); return _sliceStart[depth][(size_t)1<<depth] - _sliceStart[depth][0]; }
-	int size( int depth , int slice ) const { return _sliceStart[depth][slice+1] - _sliceStart[depth][slice]; }
-	int levels( void ) const { return _levels; }
-
-	SortedTreeNodes( void );
-	~SortedTreeNodes( void );
-	void set( TreeOctNode& root , std::vector< int >* map );
-	void set( TreeOctNode& root );
-
-	template< int Indices >
-	struct  _Indices
-	{
-		int idx[Indices];
-		_Indices( void ){ memset( idx , -1 , sizeof( int ) * Indices ); }
-		int& operator[] ( int i ) { return idx[i]; }
-		const int& operator[] ( int i ) const { return idx[i]; }
-	};
-	typedef _Indices< Square::CORNERS > SquareCornerIndices;
-	typedef _Indices< Square::EDGES > SquareEdgeIndices;
-	typedef _Indices< Square::FACES > SquareFaceIndices;
-
-	struct SliceTableData
-	{
-		Pointer( SquareCornerIndices ) cTable;
-		Pointer( SquareEdgeIndices   ) eTable;
-		Pointer( SquareFaceIndices   ) fTable;
-		int cCount , eCount , fCount , nodeOffset , nodeCount;
-		SliceTableData( void ){ fCount = eCount = cCount = 0 , cTable = NullPointer( SquareCornerIndices ) , eTable = NullPointer( SquareEdgeIndices ) , fTable = NullPointer( SquareFaceIndices ) , _cMap = _eMap = _fMap = NullPointer( int ); }
-		~SliceTableData( void ){ clear(); }
-#ifdef BRUNO_LEVY_FIX
-		void clear( void ){ DeletePointer( cTable ) ; DeletePointer( eTable ) ; DeletePointer( fTable ) ; DeletePointer( _cMap ) ; DeletePointer( _eMap ) ; DeletePointer( _fMap ) ; fCount = eCount = cCount = 0; }
-#else // !BRUNO_LEVY_FIX
-		void clear( void ){ DeletePointer( cTable ) ; DeletePointer( eTable ) ; DeletePointer( fTable ) ; fCount = eCount = cCount = 0; }
-#endif // BRUNO_LEVY_FIX
-		SquareCornerIndices& cornerIndices( const TreeOctNode* node );
-		SquareCornerIndices& cornerIndices( int idx );
-		const SquareCornerIndices& cornerIndices( const TreeOctNode* node ) const;
-		const SquareCornerIndices& cornerIndices( int idx ) const;
-		SquareEdgeIndices& edgeIndices( const TreeOctNode* node );
-		SquareEdgeIndices& edgeIndices( int idx );
-		const SquareEdgeIndices& edgeIndices( const TreeOctNode* node ) const;
-		const SquareEdgeIndices& edgeIndices( int idx ) const;
-		SquareFaceIndices& faceIndices( const TreeOctNode* node );
-		SquareFaceIndices& faceIndices( int idx );
-		const SquareFaceIndices& faceIndices( const TreeOctNode* node ) const;
-		const SquareFaceIndices& faceIndices( int idx ) const;
-	protected:
-		Pointer( int ) _cMap;
-		Pointer( int ) _eMap;
-		Pointer( int ) _fMap;
-		friend class SortedTreeNodes;
-	};
-	struct XSliceTableData
-	{
-		Pointer( SquareCornerIndices ) eTable;
-		Pointer( SquareEdgeIndices ) fTable;
-		int fCount , eCount , nodeOffset , nodeCount;
-		XSliceTableData( void ){ fCount = eCount = 0 , eTable = NullPointer( SquareCornerIndices ) , fTable = NullPointer( SquareEdgeIndices ) , _eMap = _fMap = NullPointer( int ); }
-		~XSliceTableData( void ){ clear(); }
-#ifdef BRUNO_LEVY_FIX
-		void clear( void ) { DeletePointer( fTable ) ; DeletePointer( eTable ) ; DeletePointer( _eMap ) ; DeletePointer( _fMap ) ; fCount = eCount = 0; }
-#else // !BRUNO_LEVY_FIX
-		void clear( void ) { DeletePointer( fTable ) ; DeletePointer( eTable ) ; fCount = eCount = 0; }
-#endif // BRUNO_LEVY_FIX
-		SquareCornerIndices& edgeIndices( const TreeOctNode* node );
-		SquareCornerIndices& edgeIndices( int idx );
-		const SquareCornerIndices& edgeIndices( const TreeOctNode* node ) const;
-		const SquareCornerIndices& edgeIndices( int idx ) const;
-		SquareEdgeIndices& faceIndices( const TreeOctNode* node );
-		SquareEdgeIndices& faceIndices( int idx );
-		const SquareEdgeIndices& faceIndices( const TreeOctNode* node ) const;
-		const SquareEdgeIndices& faceIndices( int idx ) const;
-	protected:
-		Pointer( int ) _eMap;
-		Pointer( int ) _fMap;
-		friend class SortedTreeNodes;
-	};
-	void setSliceTableData (  SliceTableData& sData , int depth , int offset , int threads ) const;
-	void setXSliceTableData( XSliceTableData& sData , int depth , int offset , int threads ) const;
-};
-
-template< int Degree >
-struct PointSupportKey : public OctNode< TreeNodeData >::NeighborKey< BSplineSupportSizes< Degree >::SupportEnd , -BSplineSupportSizes< Degree >::SupportStart >
-{
-	static const int LeftRadius  =  BSplineSupportSizes< Degree >::SupportEnd;
-	static const int RightRadius = -BSplineSupportSizes< Degree >::SupportStart;
-	static const int Size = LeftRadius + RightRadius + 1;
-};
-template< int Degree >
-struct ConstPointSupportKey : public OctNode< TreeNodeData >::ConstNeighborKey< BSplineSupportSizes< Degree >::SupportEnd , -BSplineSupportSizes< Degree >::SupportStart >
-{
-	static const int LeftRadius  =  BSplineSupportSizes< Degree >::SupportEnd;
-	static const int RightRadius = -BSplineSupportSizes< Degree >::SupportStart;
-	static const int Size = LeftRadius + RightRadius + 1;
-};
-
-template< class Real , bool HasGradients >
-struct SinglePointData
-{
-	Point3D< Real > position;
-	Real weight;
-	Real value , _value;
-	SinglePointData  operator +  ( const SinglePointData& p ) const { return SinglePointData( position + p.position , value + p.value , weight + p.weight ); }
-	SinglePointData& operator += ( const SinglePointData& p ){ position += p.position ; weight += p.weight , value += p.value ; return *this; }
-	SinglePointData  operator *  ( Real s ) const { return SinglePointData( position*s , weight*s , value*s ); }
-	SinglePointData& operator *= ( Real s ){ position *= s , weight *= s , value *= s ; return *this; }
-	SinglePointData  operator /  ( Real s ) const { return SinglePointData( position/s , weight/s , value/s ); }
-	SinglePointData& operator /= ( Real s ){ position /= s , weight /= s , value /= s ; return *this; }
-	SinglePointData( void ) : position( Point3D< Real >() ) , weight(0) , value(0) , _value(0) { ; }
-	SinglePointData( Point3D< Real > p , Real v , Real w ) { position = p , value = v , weight = w , _value = (Real)0; }
-};
-template< class Real >
-struct SinglePointData< Real , true > : public SinglePointData< Real , false >
-{
-	using SinglePointData< Real , false >::position;
-	using SinglePointData< Real , false >::weight;
-	using SinglePointData< Real , false >::value;
-	using SinglePointData< Real , false >::_value;
-	Point3D< Real > gradient , _gradient;
-	SinglePointData  operator +  ( const SinglePointData& p ) const { return SinglePointData( position + p.position , weight + p.weight , value + p.value , gradient + p.gradient ); }
-	SinglePointData& operator += ( const SinglePointData& p ){ position += p.position , weight += p.weight , value += p.value , gradient += p.gradient ; return *this; }
-	SinglePointData  operator *  ( Real s ) const { return SinglePointData( position*s , weight*s , value*s , gradient*s ); }
-	SinglePointData& operator *= ( Real s ){ position *= s , weight *= s , value *= s , gradient *= s ; return *this; }
-	SinglePointData  operator /  ( Real s ) const { return SinglePointData( position/s , weight/s , value/s , gradient/s ); }
-	SinglePointData& operator /= ( Real s ){ position /= s , weight /= s , value /= s , gradient /= s ; return *this; }
-	SinglePointData( void ) : SinglePointData< Real , false >() , gradient( Point3D< Real >() ) , _gradient( Point3D< Real >() ) { ; }
-	SinglePointData( Point3D< Real > p , Real v , Point3D< Real > g , Real w ) : SinglePointData< Real , false >( p , v , w ) { gradient = g , _gradient = Point3D< Real >(); }
-};
-
-#if POINT_DATA_RES
-template< class Real , bool HasGradients >
-struct PointData
-{
-	static const int RES = POINT_DATA_RES;
-	static const int SAMPLES = RES * RES * RES;
-
-	SinglePointData< Real , HasGradients > points[SAMPLES];
-	SinglePointData< Real , HasGradients >& operator[] ( int idx ) { return points[idx]; }
-	const SinglePointData< Real , HasGradients >& operator[] ( int idx ) const { return points[idx]; }
-
-	static void SetIndices( Point3D< Real > p , Point3D< Real > c , Real w , int x[3] )
-	{
-		for( int d=0 ; d<3 ; d++ ) x[d] = std::max< int >( 0 , std::min< int >( RES-1 , int( floor( ( p[d]-( c[d]-w/2 ) ) / w * RES ) ) ) );
-	}
-
-	void addPoint( SinglePointData< Real , HasGradients > p , Point3D< Real > center , Real width  )
-	{
-		int x[3];
-		SetIndices( p.position , center , width , x );
-		points[ x[0]+x[1]*RES+x[2]*RES*RES ] += p;
-	}
-
-	PointData  operator +  ( const PointData& p ) const { PointData _p ; for( int c=0 ; c<SAMPLES ;  c++ ) _p.points[c] = points[c] + _p.points[c] ; return _p; }
-	PointData& operator += ( const PointData& p ){ for( int c=0 ; c<SAMPLES ; c++ ) points[c] += p.points[c] ; return *this; }
-	PointData  operator *  ( Real s ) const { PointData _p ; for( int c=0 ; c<SAMPLES ;  c++ ) _p.points[c] = points[c] * s ; return _p; }
-	PointData& operator *= ( Real s ){ for( int c=0 ; c<SAMPLES ; c++ ) points[c] *= s ; return *this; }
-	PointData  operator /  ( Real s ) const { PointData _p ; for( int c=0 ; c<SAMPLES ;  c++ ) _p.points[c] = points[c] / s ; return _p; }
-	PointData& operator /= ( Real s ){ for( int c=0 ; c<SAMPLES ; c++ ) points[c] /= s ; return *this; }
-};
-#else // !POINT_DATA_RES
-template< class Real , bool HasGradients > using PointData = SinglePointData< Real , HasGradients >;
-#endif // POINT_DATA_RES
-
-template< class Data , int Degree >
-struct SparseNodeData
-{
-	size_t size( void ) const { return _data.size(); }
-	const Data& operator[] ( int idx ) const { return _data[idx]; }
-	Data& operator[] ( int idx ) { return _data[idx]; }
-	void reserve( size_t sz ){ if( sz>_indices.size() ) _indices.resize( sz , -1 ); }
-	Data* operator()( const OctNode< TreeNodeData >* node ){ return ( node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_indices.size() || _indices[ node->nodeData.nodeIndex ]<0 ) ? NULL : &_data[ _indices[ node->nodeData.nodeIndex ] ]; }
-	const Data* operator()( const OctNode< TreeNodeData >* node ) const { return ( node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_indices.size() || _indices[ node->nodeData.nodeIndex ]<0 ) ? NULL : &_data[ _indices[ node->nodeData.nodeIndex ] ]; }
-	Data& operator[]( const OctNode< TreeNodeData >* node )
-	{
-		if( node->nodeData.nodeIndex>=(int)_indices.size() ) _indices.resize( node->nodeData.nodeIndex+1 , -1 );
-		if( _indices[ node->nodeData.nodeIndex ]==-1 )
-		{
-			_indices[ node->nodeData.nodeIndex ] = (int)_data.size();
-			_data.push_back( Data() );
-		}
-		return _data[ _indices[ node->nodeData.nodeIndex ] ];
-	}
-	void remapIndices( const std::vector< int >& map )
-	{
-		std::vector< int > temp = _indices;
-		_indices.resize( map.size() );
-		for( size_t i=0 ; i<map.size() ; i++ )
-			if( map[i]<(int)temp.size() ) _indices[i] = temp[ map[i] ];
-			else                          _indices[i] = -1;
-	}
-	template< class _Data , int _Degree > friend struct SparseNodeData;
-	template< class _Data , int _Degree >
-	void init( const SparseNodeData< _Data , _Degree >& snd ){ _indices = snd._indices , _data.resize( snd._data.size() ); }
-	void remove( const OctNode< TreeNodeData >* node ){ if( node->nodeData.nodeIndex<(int)_indices.size() && node->nodeData.nodeIndex>=0 ) _indices[ node->nodeData.nodeIndex ] = -1; }
-protected:
-	std::vector< int > _indices;
-	std::vector< Data > _data;
-};
-template< class Data , int Degree >
-struct DenseNodeData
-{
-	DenseNodeData( void ){ _data = NullPointer( Data ) ; _sz = 0; }
-	DenseNodeData( size_t sz ){ _sz = sz ; if( sz ) _data = NewPointer< Data >( sz ) ; else _data = NullPointer( Data ); }
-	DenseNodeData( const DenseNodeData&  d ) : DenseNodeData() { _resize( d._sz ) ; if( _sz ) memcpy( _data , d._data , sizeof(Data) * _sz ); }
-	DenseNodeData(       DenseNodeData&& d ){ _data = d._data , _sz = d._sz ; d._data = NullPointer( Data ) , d._sz = 0; }
-	DenseNodeData& operator = ( const DenseNodeData&  d ){ _resize( d._sz ) ; if( _sz ) memcpy( _data , d._data , sizeof(Data) * _sz ) ; return *this; }
-	DenseNodeData& operator = (       DenseNodeData&& d ){ size_t __sz = _sz ; Pointer( Data ) __data = _data ; _data = d._data , _sz = d._sz ; d._data = __data , d._sz = __sz ; return *this; }
-	~DenseNodeData( void ){ DeletePointer( _data ) ; _sz = 0; }
-
-	Data& operator[] ( int idx ) { return _data[idx]; }
-	const Data& operator[] ( int idx ) const { return _data[idx]; }
-	size_t size( void ) const { return _sz; }
-	Data& operator[]( const OctNode< TreeNodeData >* node ) { return _data[ node->nodeData.nodeIndex ]; }
-	Data* operator()( const OctNode< TreeNodeData >* node ) { return ( node==NULL || node->nodeData.nodeIndex>=(int)_sz ) ? NULL : &_data[ node->nodeData.nodeIndex ]; }
-	const Data* operator()( const OctNode< TreeNodeData >* node ) const { return ( node==NULL || node->nodeData.nodeIndex>=(int)_sz ) ? NULL : &_data[ node->nodeData.nodeIndex ]; }
-	int index( const OctNode< TreeNodeData >* node ) const { return ( !node || node->nodeData.nodeIndex<0 || node->nodeData.nodeIndex>=(int)_data.size() ) ? -1 : node->nodeData.nodeIndex; }
-protected:
-	size_t _sz;
-	void _resize( size_t sz ){ DeletePointer( _data ) ; if( sz ) _data = NewPointer< Data >( sz ) ; else _data = NullPointer( Data ) ; _sz = sz; }
-	Pointer( Data ) _data;
-};
-
-// This is may be necessary in case the memory usage is larger than what fits on the stack
-template< class C , int N > struct Stencil
-{
-	Stencil( void ){ _values = NewPointer< C >( N * N * N ); }
-	~Stencil( void ){ DeletePointer( _values ); }
-	C& operator()( int i , int j , int k ){ return _values[ i*N*N + j*N + k ]; }
-	const C& operator()( int i , int j , int k ) const { return _values[ i*N*N + j*N + k ]; }
-protected:
-	Pointer( C ) _values;
-};
-
-template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 >
-class SystemCoefficients
-{
-	typedef typename BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIntegrator FunctionIntegrator;
-	static const int OverlapSize  = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapSize;
-	static const int OverlapStart = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapStart;
-	static const int OverlapEnd   = BSplineOverlapSizes< Degree1 , Degree2 >::OverlapEnd;
-public:
-	typedef typename BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIntegrator::template      Integrator< DERIVATIVES( Degree1 ) , DERIVATIVES( Degree2 ) >      Integrator;
-	typedef typename BSplineIntegrationData< Degree1 , BType1 , Degree2 , BType2 >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( Degree1 ) , DERIVATIVES( Degree2 ) > ChildIntegrator;
-
-	// The FEMSystemFunctor is a class that takes an object of type Integrator/ChildIntegrator, as well as a pair of indices of octree nodes
-	// and returns the corresponding system coefficient.
-	template< class _FEMSystemFunctor > static void SetCentralSystemStencil ( const _FEMSystemFunctor& F , const      Integrator& integrator , Stencil< double , OverlapSize >& stencil           );
-	template< class _FEMSystemFunctor > static void SetCentralSystemStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil< double , OverlapSize >  stencils[2][2][2] );
-	template< bool Reverse , class _FEMSystemFunctor > static void SetCentralConstraintStencil ( const _FEMSystemFunctor& F , const      Integrator& integrator , Stencil<          double   , OverlapSize >& stencil           );
-	template< bool Reverse , class _FEMSystemFunctor > static void SetCentralConstraintStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil<          double   , OverlapSize >  stencils[2][2][2] );
-	template< bool Reverse , class _FEMSystemFunctor > static void SetCentralConstraintStencil ( const _FEMSystemFunctor& F , const      Integrator& integrator , Stencil< Point3D< double > , OverlapSize >& stencil           );
-	template< bool Reverse , class _FEMSystemFunctor > static void SetCentralConstraintStencils( const _FEMSystemFunctor& F , const ChildIntegrator& integrator , Stencil< Point3D< double > , OverlapSize >  stencils[2][2][2] );
-};
-
-template< int FEMDegree , BoundaryType BType >
-struct FEMSystemFunctor
-{
-	double massWeight , lapWeight , biLapWeight;
-	FEMSystemFunctor( double mWeight=0 , double lWeight=0 , double bWeight=0 ) : massWeight( mWeight ) , lapWeight( lWeight ) , biLapWeight( bWeight ) { ; }
-	double integrate( const typename SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::     Integrator& integrator , const int off1[] , const int off2[] ) const { return _integrate( integrator , off1 , off2 ); }
-	double integrate( const typename SystemCoefficients< FEMDegree , BType , FEMDegree , BType >::ChildIntegrator& integrator , const int off1[] , const int off2[] ) const { return _integrate( integrator , off1 , off2 ); }
-	bool vanishesOnConstants( void ) const { return massWeight==0; }
-protected:
-	template< class I > double _integrate( const I& integrator , const int off1[] , const int off2[] ) const;
-};
-template< int SFDegree , BoundaryType SFBType , int FEMDegree , BoundaryType FEMBType >
-struct FEMSFConstraintFunctor
-{
-	double massWeight , lapWeight , biLapWeight;
-	FEMSFConstraintFunctor( double mWeight=0 , double lWeight=0 , double bWeight=0 ) : massWeight( mWeight ) , lapWeight( lWeight ) , biLapWeight( bWeight ) { ; }
-	template< bool Reverse >
-	double integrate( const typename SystemCoefficients< Reverse ? FEMDegree : SFDegree , Reverse ? FEMBType : SFBType , Reverse ? SFDegree : FEMDegree , Reverse ? SFBType : FEMBType >::     Integrator& integrator , const int off1[] , const int off2[] ) const { return _integrate< Reverse >( integrator , off1 , off2 ); }
-	template< bool Reverse >
-	double integrate( const typename SystemCoefficients< Reverse ? FEMDegree : SFDegree , Reverse ? FEMBType : SFBType , Reverse ? SFDegree : FEMDegree , Reverse ? SFBType : FEMBType >::ChildIntegrator& integrator , const int off1[] , const int off2[] ) const { return _integrate< Reverse >( integrator , off1 , off2 ); }
-protected:
-	template< bool Reverse , class I > double _integrate( const I& integrator , const int off1[] , const int off[2] ) const;
-};
-template< int VFDegree , BoundaryType VFBType , int FEMDegree , BoundaryType FEMBType >
-struct FEMVFConstraintFunctor
-{
-	double lapWeight , biLapWeight;
-	FEMVFConstraintFunctor( double lWeight=0 , double bWeight=0 ) : lapWeight( lWeight ) , biLapWeight( bWeight ) { ; }
-	template< bool Reverse >
-	Point3D< double > integrate( const typename SystemCoefficients< Reverse ? FEMDegree : VFDegree , Reverse ? FEMBType : VFBType , Reverse ? VFDegree : FEMDegree , Reverse ? VFBType : FEMBType >::     Integrator& integrator , const int off1[] , const int off2[] ) const { return _integrate< Reverse >( integrator , off1 , off2 ); }
-	template< bool Reverse >
-	Point3D< double > integrate( const typename SystemCoefficients< Reverse ? FEMDegree : VFDegree , Reverse ? FEMBType : VFBType , Reverse ? VFDegree : FEMDegree , Reverse ? VFBType : FEMBType >::ChildIntegrator& integrator , const int off1[] , const int off2[] ) const { return _integrate< Reverse >( integrator , off1 , off2 ); }
-protected:
-	template< bool Reverse , class I > Point3D< double > _integrate( const I& integrator , const int off1[] , const int off[2] ) const;
-};
-
-inline void SetGhostFlag( OctNode< TreeNodeData >* node , bool flag ){ if( node && node->parent ) node->parent->nodeData.setGhostFlag( flag ); }
-inline bool GetGhostFlag( const OctNode< TreeNodeData >* node ){ return node==NULL || node->parent==NULL || node->parent->nodeData.getGhostFlag( ); }
-inline bool IsActiveNode( const OctNode< TreeNodeData >* node ){ return !GetGhostFlag( node ); }
-
-template< class Real >
-class Octree
-{
-	typedef OctNode< TreeNodeData > TreeOctNode;
-	static int _NodeCount;
-	static void _NodeInitializer( TreeOctNode& node ){ node.nodeData.nodeIndex = _NodeCount++; }
-public:
-#if 0
-	struct LocalDepth
-	{
-		LocalDepth( int d=0 ) : _d(d) { ; }
-		operator int&()       { return _d; }
-		operator int () const { return _d; }
-	protected:
-		int _d;
-	};
-	struct LocalOffset
-	{
-		LocalOffset( const int* off=NULL ){ if( off ) memcpy( _off , off , sizeof(_off) ) ; else memset( _off , 0 , sizeof( _off ) ); }
-		operator        int*()       { return _off; }
-		operator const  int*() const { return _off; }
-	protected:
-		int _off[3];
-	};
-#else
-	typedef int LocalDepth;
-	typedef int LocalOffset[3];
-#endif
-
-	static void ResetNodeCount( void ){ _NodeCount = 0 ; }
-	static int NodeCount( void ){ return _NodeCount; }
-	template< int FEMDegree , BoundaryType BType > void functionIndex( const TreeOctNode* node , int idx[3] ) const;
-
-	struct PointSample{ const TreeOctNode* node ; ProjectiveData< OrientedPoint3D< Real > , Real > sample; };
-
-	typedef typename TreeOctNode::     NeighborKey< 1 , 1 >      AdjacenctNodeKey;
-	typedef typename TreeOctNode::ConstNeighborKey< 1 , 1 > ConstAdjacenctNodeKey;
-
-	template< int FEMDegree , BoundaryType BType > bool isValidFEMNode( const TreeOctNode* node ) const;
-	bool isValidSpaceNode( const TreeOctNode* node ) const;
-	TreeOctNode* leaf( Point3D< Real > p );
-	const TreeOctNode* leaf( Point3D< Real > p ) const;
-
-	template< bool HasGradients >
-	struct InterpolationInfo
-	{
-		SparseNodeData< PointData< Real , HasGradients > , 0 > iData;
-		Real valueWeight , gradientWeight;
-		InterpolationInfo( const class Octree< Real >& tree , const std::vector< PointSample >& samples , Real pointValue , int adaptiveExponent , Real v , Real g ) : valueWeight(v) , gradientWeight(g)
-		{ iData = tree._densifyInterpolationInfo< HasGradients >( samples , pointValue , adaptiveExponent ); }
-		PointData< Real , HasGradients >* operator()( const OctNode< TreeNodeData >* node ){ return iData(node); }
-		const PointData< Real , HasGradients >* operator()( const OctNode< TreeNodeData >* node ) const { return iData(node); }
-	};
-
-	template< int DensityDegree > struct DensityEstimator : public SparseNodeData< Real , DensityDegree >
-	{
-		DensityEstimator( int kernelDepth ) : _kernelDepth( kernelDepth ){ ; }
-		int kernelDepth( void ) const { return _kernelDepth; }
-	protected:
-		int _kernelDepth;
-	};
-protected:
-	bool _isValidSpaceNode( const TreeOctNode* node ) const { return !GetGhostFlag( node ) && ( node->nodeData.flags & TreeNodeData::SPACE_FLAG ); }
-	bool _isValidFEMNode( const TreeOctNode* node ) const { return !GetGhostFlag( node ) && ( node->nodeData.flags & TreeNodeData::FEM_FLAG ); }
-
-	TreeOctNode* _tree;
-	TreeOctNode* _spaceRoot;
-	SortedTreeNodes _sNodes;
-	LocalDepth _fullDepth , _maxDepth;
-
-	static bool _InBounds( Point3D< Real > p );
-
-	int _depthOffset;
-	int _localToGlobal( LocalDepth d ) const { return d + _depthOffset; }
-	LocalDepth _localDepth( const TreeOctNode* node ) const { return node->depth() - _depthOffset; }
-	LocalDepth _localMaxDepth( const TreeOctNode* tree ) const { return tree->maxDepth() - _depthOffset; }
-	int _localInset( LocalDepth d ) const { return _depthOffset<=1 ? 0 : 1<<( d + _depthOffset - 1 ); }
-	void _localDepthAndOffset( const TreeOctNode* node , LocalDepth& d , LocalOffset& off ) const
-	{
-		node->depthAndOffset( d , off ) ; d -= _depthOffset;
-		int inset = _localInset( d );
-		off[0] -= inset , off[1] -= inset , off[2] -= inset;
-	}
-	template< int FEMDegree , BoundaryType BType > static int _BSplineBegin( LocalDepth depth ){ return BSplineEvaluationData< FEMDegree , BType >::Begin( depth ); }
-	template< int FEMDegree , BoundaryType BType > static int _BSplineEnd  ( LocalDepth depth ){ return BSplineEvaluationData< FEMDegree , BType >::End  ( depth ); }
-	template< int FEMDegree , BoundaryType BType >
-	bool _outOfBounds( const TreeOctNode* node ) const
-	{
-		if( !node ) return true;
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		return d<0 || BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[0] ) || BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[1] ) || BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[2] );
-	}
-	int _sNodesBegin( LocalDepth d ) const { return _sNodes.begin( _localToGlobal( d ) ); }
-	int _sNodesEnd  ( LocalDepth d ) const { return _sNodes.end  ( _localToGlobal( d ) ); }
-	int _sNodesSize ( LocalDepth d ) const { return _sNodes.size ( _localToGlobal( d ) ); }
-	int _sNodesBegin( LocalDepth d , int slice ) const { return _sNodes.begin( _localToGlobal( d ) , slice + _localInset( d ) ); }
-	int _sNodesEnd  ( LocalDepth d , int slice ) const { return _sNodes.end  ( _localToGlobal( d ) , slice + _localInset( d ) ); }
-	int _sNodesSize ( LocalDepth d , int slice ) const { return _sNodes.size ( _localToGlobal( d ) , slice + _localInset( d ) ); }
-
-	template< int FEMDegree > static bool _IsInteriorlySupported( LocalDepth depth , const LocalOffset off )
-	{
-		if( depth>=0 )
-		{
-			int begin , end;
-			BSplineSupportSizes< FEMDegree >::InteriorSupportedSpan( depth , begin , end );
-			return ( off[0]>=begin && off[0]<end && off[1]>=begin && off[1]<end && off[2]>=begin && off[2]<end );
-		}
-		else return false;
-	}
-	template< int FEMDegree > bool _isInteriorlySupported( const TreeOctNode* node ) const
-	{
-		if( !node ) return false;
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		return _IsInteriorlySupported< FEMDegree >( d , off );
-	}
-	template< int FEMDegree1 , int FEMDegree2 > static bool _IsInteriorlyOverlapped( LocalDepth depth , const LocalOffset off )
-	{
-		if( depth>=0 )
-		{
-			int begin , end;
-			BSplineIntegrationData< FEMDegree1 , BOUNDARY_NEUMANN , FEMDegree2 , BOUNDARY_NEUMANN >::InteriorOverlappedSpan( depth , begin , end );
-			return ( off[0]>=begin && off[0]<end && off[1]>=begin && off[1]<end && off[2]>=begin && off[2]<end );
-		}
-		else return false;
-	}
-	template< int FEMDegree1 , int FEMDegree2 > bool _isInteriorlyOverlapped( const TreeOctNode* node ) const
-	{
-		if( !node ) return false;
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		return _IsInteriorlyOverlapped< FEMDegree1 , FEMDegree2 >( d , off );
-	}
-	void _startAndWidth( const TreeOctNode* node , Point3D< Real >& start , Real& width ) const
-	{
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		if( d>=0 ) width = Real( 1.0 / (1<<  d ) );
-		else       width = Real( 1.0 * (1<<(-d)) );
-		for( int dd=0 ; dd<DIMENSION ; dd++ ) start[dd] = Real( off[dd] ) * width;
-	}
-	void _centerAndWidth( const TreeOctNode* node , Point3D< Real >& center , Real& width ) const
-	{
-		int d , off[3];
-		_localDepthAndOffset( node , d , off );
-		width = Real( 1.0 / (1<<d) );
-		for( int dd=0 ; dd<DIMENSION ; dd++ ) center[dd] = Real( off[dd] + 0.5 ) * width;
-	}
-	int _childIndex( const TreeOctNode* node , Point3D< Real > p ) const
-	{
-		Point3D< Real > c ; Real w;
-		_centerAndWidth( node , c , w );
-		return ( p[0]<c[0] ? 0 : 1 ) | ( p[1]<c[1] ? 0 : 2 ) | ( p[2]<c[2] ? 0 : 4 );
-	}
-
-	template< int Degree , BoundaryType BType > void _setFullDepth( TreeOctNode* node , LocalDepth depth ) const;
-	template< int Degree , BoundaryType BType > void _setFullDepth( LocalDepth depth );
-
-	template< int LeftRadius , int RightRadius >
-	static typename TreeOctNode::ConstNeighbors< LeftRadius + RightRadius + 1 >& _neighbors( TreeOctNode::ConstNeighborKey< LeftRadius , RightRadius >& key , const TreeOctNode* node ){ return key.neighbors[ node->depth() ]; }
-	template< int LeftRadius , int RightRadius >
-	static typename TreeOctNode::Neighbors< LeftRadius + RightRadius + 1 >& _neighbors( TreeOctNode::NeighborKey< LeftRadius , RightRadius >& key , const TreeOctNode* node ){ return key.neighbors[ node->depth() ]; }
-	template< int LeftRadius , int RightRadius >
-	static const typename TreeOctNode::template Neighbors< LeftRadius + RightRadius + 1 >& _neighbors( const typename TreeOctNode::template NeighborKey< LeftRadius , RightRadius >& key , const TreeOctNode* node ){ return key.neighbors[ node->depth() ]; }
-	template< int LeftRadius , int RightRadius >
-	static const typename TreeOctNode::template ConstNeighbors< LeftRadius + RightRadius + 1 >& _neighbors( const typename TreeOctNode::template ConstNeighborKey< LeftRadius , RightRadius >& key , const TreeOctNode* node ){ return key.neighbors[ node->depth() ]; }
-
-public:
-	LocalDepth depth( const TreeOctNode* node ) const { return _localDepth( node ); }
-	void depthAndOffset( const TreeOctNode* node , LocalDepth& depth , LocalOffset& offset ) const { _localDepthAndOffset( node , depth , offset ); }
-
-	int nodesBegin( LocalDepth d ) const { return _sNodes.begin( _localToGlobal( d ) ); }
-	int nodesEnd  ( LocalDepth d ) const { return _sNodes.end  ( _localToGlobal( d ) ); }
-	int nodesSize ( LocalDepth d ) const { return _sNodes.size ( _localToGlobal( d ) ); }
-	int nodesBegin( LocalDepth d , int slice ) const { return _sNodes.begin( _localToGlobal( d ) , slice + _localInset( d ) ); }
-	int nodesEnd  ( LocalDepth d , int slice ) const { return _sNodes.end  ( _localToGlobal( d ) , slice + _localInset( d ) ); }
-	int nodesSize ( LocalDepth d , int slice ) const { return _sNodes.size ( _localToGlobal( d ) , slice + _localInset( d ) ); }
-	const TreeOctNode* node( int idx ) const { return _sNodes.treeNodes[idx]; }
-protected:
-
-	////////////////////////////////////
-	// System construction code       //
-	// MultiGridOctreeData.System.inl //
-	////////////////////////////////////
-	template< int FEMDegree >
-	void _setMultiColorIndices( int start , int end , std::vector< std::vector< int > >& indices ) const;
-	struct _SolverStats
-	{
-		double evaluateTime , systemTime , solveTime;
-		double bNorm2 , inRNorm2 , outRNorm2;
-	};
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	int _solveSystemGS( const FEMSystemFunctor& F , const BSplineData< FEMDegree , BType >& bsData , InterpolationInfo< HasGradients >* interpolationInfo , LocalDepth depth , DenseNodeData< Real , FEMDegree >& solution , DenseNodeData< Real , FEMDegree >& constraints , DenseNodeData< Real , FEMDegree >& metSolutionConstraints , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms );
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	int _solveSystemCG( const FEMSystemFunctor& F , const BSplineData< FEMDegree , BType >& bsData , InterpolationInfo< HasGradients >* interpolationInfo , LocalDepth depth , DenseNodeData< Real , FEMDegree >& solution , DenseNodeData< Real , FEMDegree >& constraints , DenseNodeData< Real , FEMDegree >& metSolutionConstraints , int iters , bool coarseToFine , _SolverStats& stats , bool computeNorms , double accuracy );
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	int _setMatrixRow( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors , Pointer( MatrixEntry< Real > ) row , int offset , const typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , const Stencil< double , BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& stencil , const BSplineData< FEMDegree , BType >& bsData ) const;
-	template< int FEMDegree , BoundaryType BType >
-	int _getMatrixRowSize( const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors ) const;
-
-	template< int FEMDegree1 , int FEMDegree2 > static void _SetParentOverlapBounds( const TreeOctNode* node , int& startX , int& endX , int& startY , int& endY , int& startZ , int& endZ );
-	// Updates the constraints @(depth) based on the solution coefficients @(depth-1)
-
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	void _updateConstraintsFromCoarser( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& neighbors , const typename TreeOctNode::Neighbors< BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& pNeighbors , TreeOctNode* node , DenseNodeData< Real , FEMDegree >& constraints , const DenseNodeData< Real , FEMDegree >& metSolution , const typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const Stencil< double , BSplineOverlapSizes< FEMDegree , FEMDegree >::OverlapSize >& stencil , const BSplineData< FEMDegree , BType >& bsData ) const;
-
-	// evaluate the points @(depth) using coefficients @(depth-1)
-	template< int FEMDegree , BoundaryType BType , bool HasGradients >
-	void _setPointValuesFromCoarser( InterpolationInfo< HasGradients >& interpolationInfo , LocalDepth highDepth , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients );
-
-	// Updates the cumulative integral constraints @(depth-1) based on the change in solution coefficients @(depth)
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor >
-	void _updateCumulativeIntegralConstraintsFromFiner( const FEMSystemFunctor& F , 
-		const BSplineData< FEMDegree , BType >& bsData , LocalDepth highDepth , const DenseNodeData< Real , FEMDegree >& fineSolution , DenseNodeData< Real , FEMDegree >& cumulativeConstraints ) const;
-	// Updates the cumulative interpolation constraints @(depth-1) based on the change in solution coefficient @(depth)
-	template< int FEMDegree , BoundaryType BType , bool HasGradients >
-	void _updateCumulativeInterpolationConstraintsFromFiner( const InterpolationInfo< HasGradients >& interpolationInfo ,
-		const BSplineData< FEMDegree , BType >& bsData , LocalDepth highDepth , const DenseNodeData< Real , FEMDegree >& fineSolution , DenseNodeData< Real , FEMDegree >& cumulativeConstraints ) const;
-
-	template< int FEMDegree , BoundaryType BType >
-	Real _coarserFunctionValue( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients ) const;
-	template< int FEMDegree , BoundaryType BType >
-	Point3D< Real > _coarserFunctionGradient( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& upSampledCoefficients ) const;
-	template< int FEMDegree , BoundaryType BType >
-	Real   _finerFunctionValue( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& coefficients ) const;
-	template< int FEMDegree , BoundaryType BType >
-	Point3D< Real >   _finerFunctionGradient( Point3D< Real > p , const PointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , const BSplineData< FEMDegree , BType >& bsData , const DenseNodeData< Real , FEMDegree >& coefficients ) const;
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	int _getSliceMatrixAndUpdateConstraints( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , SparseMatrix< Real >& matrix , DenseNodeData< Real , FEMDegree >& constraints , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const BSplineData< FEMDegree , BType >& bsData , LocalDepth depth , int slice , const DenseNodeData< Real , FEMDegree >& metSolution , bool coarseToFine );
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	int _getMatrixAndUpdateConstraints( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , SparseMatrix< Real >& matrix , DenseNodeData< Real , FEMDegree >& constraints , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template Integrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& integrator , typename BSplineIntegrationData< FEMDegree , BType , FEMDegree , BType >::FunctionIntegrator::template ChildIntegrator< DERIVATIVES( FEMDegree ) , DERIVATIVES( FEMDegree ) >& childIntegrator , const BSplineData< FEMDegree , BType >& bsData , LocalDepth depth , const DenseNodeData< Real , FEMDegree >& metSolution , bool coarseToFine );
-
-	// Down samples constraints @(depth) to constraints @(depth-1)
-	template< class C , int FEMDegree , BoundaryType BType > void _downSample( LocalDepth highDepth , DenseNodeData< C , FEMDegree >& constraints ) const;
-	// Up samples coefficients @(depth-1) to coefficients @(depth)
-	template< class C , int FEMDegree , BoundaryType BType > void _upSample( LocalDepth highDepth , DenseNodeData< C , FEMDegree >& coefficients ) const;
-	template< class C , int FEMDegree , BoundaryType BType > static void _UpSample( LocalDepth highDepth , ConstPointer( C ) lowCoefficients , Pointer( C ) highCoefficients , int threads );
-public:
-	template< class C , int FEMDegree , BoundaryType BType > DenseNodeData< C , FEMDegree > coarseCoefficients( const  DenseNodeData< C , FEMDegree >& coefficients ) const;
-	template< class C , int FEMDegree , BoundaryType BType > DenseNodeData< C , FEMDegree > coarseCoefficients( const SparseNodeData< C , FEMDegree >& coefficients ) const;
-protected:
-
-	/////////////////////////////////////////////
-	// Code for splatting point-sample data    //
-	// MultiGridOctreeData.WeightedSamples.inl //
-	/////////////////////////////////////////////
-	template< int WeightDegree >
-	void _addWeightContribution( DensityEstimator< WeightDegree >& densityWeights , TreeOctNode* node , Point3D< Real > position , PointSupportKey< WeightDegree >& weightKey , Real weight=Real(1.0) );
-	template< int WeightDegree , class PointSupportKey >
-	Real _getSamplesPerNode( const DensityEstimator< WeightDegree >& densityWeights , const TreeOctNode* node , Point3D< Real > position , PointSupportKey& weightKey ) const;
-	template< int WeightDegree , class PointSupportKey >
-	void _getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , const TreeOctNode* node , Point3D< Real > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const;
-	template< int WeightDegree , class PointSupportKey >
-	void _getSampleDepthAndWeight( const DensityEstimator< WeightDegree >& densityWeights , Point3D< Real > position , PointSupportKey& weightKey , Real& depth , Real& weight ) const;
-	template< bool CreateNodes ,                    int DataDegree , class V > void      _splatPointData( TreeOctNode* node ,                                           Point3D< Real > point , V v , SparseNodeData< V , DataDegree >& data ,                                              PointSupportKey< DataDegree >& dataKey                                                   );
-	template< bool CreateNodes , int WeightDegree , int DataDegree , class V > Real      _splatPointData( const DensityEstimator< WeightDegree >& densityWeights , Point3D< Real > point , V v , SparseNodeData< V , DataDegree >& data , PointSupportKey< WeightDegree >& weightKey , PointSupportKey< DataDegree >& dataKey , LocalDepth minDepth , LocalDepth maxDepth , int dim=DIMENSION );
-	template< bool CreateNodes , int WeightDegree , int DataDegree , class V > Real _multiSplatPointData( const DensityEstimator< WeightDegree >* densityWeights , TreeOctNode* node , Point3D< Real > point , V v , SparseNodeData< V , DataDegree >& data , PointSupportKey< WeightDegree >& weightKey , PointSupportKey< DataDegree >& dataKey , int dim=DIMENSION );
-	template< class V , int DataDegree , BoundaryType BType , class Coefficients > V _evaluate( const Coefficients& coefficients , Point3D< Real > p , const BSplineData< DataDegree , BType >& bsData , const ConstPointSupportKey< DataDegree >& dataKey ) const;
-public:
-	template< class V , int DataDegree , BoundaryType BType > Pointer( V ) voxelEvaluate( const DenseNodeData< V , DataDegree >& coefficients , int& res , Real isoValue=0.f , LocalDepth depth=-1 , bool primal=false );
-
-	template< int NormalDegree >
-	struct HasNormalDataFunctor
-	{
-		const SparseNodeData< Point3D< Real > , NormalDegree >& normalInfo;
-		HasNormalDataFunctor( const SparseNodeData< Point3D< Real > , NormalDegree >& ni ) : normalInfo( ni ){ ; }
-		bool operator() ( const TreeOctNode* node ) const
-		{
-			const Point3D< Real >* n = normalInfo( node );
-			if( n )
-			{
-				const Point3D< Real >& normal = *n;
-				if( normal[0]!=0 || normal[1]!=0 || normal[2]!=0 ) return true;
-			}
-			if( node->children ) for( int c=0 ; c<Cube::CORNERS ; c++ ) if( (*this)( node->children + c ) ) return true;
-			return false;
-		}
-	};
-	struct TrivialHasDataFunctor{ bool operator() ( const TreeOctNode* node ) const{ return true; } };
-
-	// [NOTE] The input/output for this method is pre-scaled by weight
-	template< bool HasGradients > bool _setInterpolationInfoFromChildren( TreeOctNode* node , SparseNodeData< PointData< Real , HasGradients > , 0 >& iInfo ) const;
-	template< bool HasGradients > SparseNodeData< PointData< Real , HasGradients > , 0 > _densifyInterpolationInfo( const std::vector< PointSample >& samples , Real pointValue , int adaptiveExponent ) const;
-
-	template< int FEMDegree , BoundaryType BType > void _setValidityFlags( void );
-	template< class HasDataFunctor > void _clipTree( const HasDataFunctor& f );
-
-	template< int FEMDegree , BoundaryType BType > SparseNodeData<          Real   , 0 > leafValues   ( const DenseNodeData< Real , FEMDegree >& coefficients ) const;
-	template< int FEMDegree , BoundaryType BType > SparseNodeData< Point3D< Real > , 0 > leafGradients( const DenseNodeData< Real , FEMDegree >& coefficients ) const;
-
-	////////////////////////////////////
-	// Evaluation Methods             //
-	// MultiGridOctreeData.Evaluation //
-	////////////////////////////////////
-	static const int CHILDREN = Cube::CORNERS;
-	template< int FEMDegree , BoundaryType BType >
-	struct _Evaluator
-	{
-		typename BSplineEvaluationData< FEMDegree , BType >::Evaluator evaluator;
-		typename BSplineEvaluationData< FEMDegree , BType >::ChildEvaluator childEvaluator;
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > cellStencil;
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > cellStencils  [CHILDREN];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > edgeStencil             [Cube::EDGES  ];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > edgeStencils  [CHILDREN][Cube::EDGES  ];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > faceStencil             [Cube::FACES  ];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > faceStencils  [CHILDREN][Cube::FACES  ];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > cornerStencil           [Cube::CORNERS];
-		Stencil< double , BSplineSupportSizes< FEMDegree >::SupportSize > cornerStencils[CHILDREN][Cube::CORNERS];
-
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dCellStencil;
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dCellStencils  [CHILDREN];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dEdgeStencil             [Cube::EDGES  ];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dEdgeStencils  [CHILDREN][Cube::EDGES  ];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dFaceStencil             [Cube::FACES  ];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dFaceStencils  [CHILDREN][Cube::FACES  ];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dCornerStencil           [Cube::CORNERS];
-		Stencil< Point3D< double > , BSplineSupportSizes< FEMDegree >::SupportSize > dCornerStencils[CHILDREN][Cube::CORNERS];
-
-		void set( LocalDepth depth );
-		_Evaluator( void ){ _bsData = NULL; }
-		~_Evaluator( void ){ if( _bsData ) delete _bsData , _bsData = NULL; }
-	protected:
-		BSplineData< FEMDegree , BType >* _bsData;
-		friend Octree;
-	};
-	template< class V , int FEMDegree , BoundaryType BType >
-	V _getCenterValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node ,                     const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< class V , int FEMDegree , BoundaryType BType >
-	V _getCornerValue( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int corner        , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< class V , int FEMDegree , BoundaryType BType >
-	V _getEdgeValue  ( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int edge          , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< class V , int FEMDegree , BoundaryType BType >
-	V _getValue      ( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , Point3D< Real > p , const DenseNodeData< V , FEMDegree >& solution , const DenseNodeData< V , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator ) const;
-
-	template< int FEMDegree , BoundaryType BType >
-	std::pair< Real , Point3D< Real > > _getCenterValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node ,                     const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< int FEMDegree , BoundaryType BType >
-	std::pair< Real , Point3D< Real > > _getCornerValueAndGradient( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int corner        , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< int FEMDegree , BoundaryType BType >
-	std::pair< Real , Point3D< Real > > _getEdgeValueAndGradient  ( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , int edge          , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator , bool isInterior ) const;
-	template< int FEMDegree , BoundaryType BType >
-	std::pair< Real , Point3D< Real > > _getValueAndGradient      ( const ConstPointSupportKey< FEMDegree >& neighborKey , const TreeOctNode* node , Point3D< Real > p , const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , const _Evaluator< FEMDegree , BType >& evaluator ) const;
-
-public:
-	template< int Degree , BoundaryType BType >
-	class MultiThreadedEvaluator
-	{
-		const Octree* _tree;
-		int _threads;
-		std::vector< ConstPointSupportKey< Degree > > _neighborKeys;
-		_Evaluator< Degree , BType > _evaluator;
-		const DenseNodeData< Real , Degree >& _coefficients;
-		DenseNodeData< Real , Degree > _coarseCoefficients;
-	public:
-		MultiThreadedEvaluator( const Octree* tree , const DenseNodeData< Real , Degree >& coefficients , int threads=1 );
-		Real value( Point3D< Real > p , int thread=0 , const TreeOctNode* node=NULL );
-		std::pair< Real , Point3D< Real > > valueAndGradient( Point3D< Real > , int thread=0 , const TreeOctNode* node=NULL );
-	};
-
-	////////////////////////////////////////
-	// Iso-Surfacing Methods              //
-	// MultiGridOctreeData.IsoSurface.inl //
-	////////////////////////////////////////
-protected:
-	struct _IsoEdge
-	{
-		long long edges[2];
-		_IsoEdge( void ){ edges[0] = edges[1] = 0; }
-		_IsoEdge( long long v1 , long long v2 ){ edges[0] = v1 , edges[1] = v2; }
-		long long& operator[]( int idx ){ return edges[idx]; }
-		const long long& operator[]( int idx ) const { return edges[idx]; }
-	};
-	struct _FaceEdges
-	{
-		_IsoEdge edges[2];
-		int count;
-	};
-	template< class Vertex >
-	struct _SliceValues
-	{
-		typename SortedTreeNodes::SliceTableData sliceData;
-		Pointer( Real ) cornerValues ; Pointer( Point3D< Real > ) cornerGradients ; Pointer( char ) cornerSet;
-		Pointer( long long ) edgeKeys ; Pointer( char ) edgeSet;
-		Pointer( _FaceEdges ) faceEdges ; Pointer( char ) faceSet;
-		Pointer( char ) mcIndices;
-		std::unordered_map< long long, std::vector< _IsoEdge > > faceEdgeMap;
-		std::unordered_map< long long, std::pair< int, Vertex > > edgeVertexMap;
-		std::unordered_map< long long, long long > vertexPairMap;
-
-		_SliceValues( void );
-		~_SliceValues( void );
-		void reset( bool nonLinearFit );
-	protected:
-		int _oldCCount , _oldECount , _oldFCount , _oldNCount;
-	};
-	template< class Vertex >
-	struct _XSliceValues
-	{
-		typename SortedTreeNodes::XSliceTableData xSliceData;
-		Pointer( long long ) edgeKeys ; Pointer( char ) edgeSet;
-		Pointer( _FaceEdges ) faceEdges ; Pointer( char ) faceSet;
-		std::unordered_map< long long, std::vector< _IsoEdge > > faceEdgeMap;
-		std::unordered_map< long long, std::pair< int, Vertex > > edgeVertexMap;
-		std::unordered_map< long long, long long > vertexPairMap;
-
-		_XSliceValues( void );
-		~_XSliceValues( void );
-		void reset( void );
-	protected:
-		int _oldECount , _oldFCount;
-	};
-	template< class Vertex >
-	struct _SlabValues
-	{
-	protected:
-		_XSliceValues< Vertex > _xSliceValues[2];
-		_SliceValues< Vertex > _sliceValues[2];
-	public:
-		_SliceValues< Vertex >& sliceValues( int idx ){ return _sliceValues[idx&1]; }
-		const _SliceValues< Vertex >& sliceValues( int idx ) const { return _sliceValues[idx&1]; }
-		_XSliceValues< Vertex >& xSliceValues( int idx ){ return _xSliceValues[idx&1]; }
-		const _XSliceValues< Vertex >& xSliceValues( int idx ) const { return _xSliceValues[idx&1]; }
-	};
-	template< class Vertex , int FEMDegree , BoundaryType BType >
-	void _setSliceIsoCorners( const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , Real isoValue , LocalDepth depth , int slice ,         std::vector< _SlabValues< Vertex > >& sValues , const _Evaluator< FEMDegree , BType >& evaluator , int threads );
-	template< class Vertex , int FEMDegree , BoundaryType BType >
-	void _setSliceIsoCorners( const DenseNodeData< Real , FEMDegree >& solution , const DenseNodeData< Real , FEMDegree >& coarseSolution , Real isoValue , LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& sValues , const _Evaluator< FEMDegree , BType >& evaluator , int threads );
-	template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-	void _setSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slice ,         int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& sValues , int threads );
-	template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-	void _setSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slice , int z , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& sValues , int threads );
-	template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-	void _setXSliceIsoVertices( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , LocalDepth depth , int slab , int& vOffset , CoredMeshData< Vertex >& mesh , std::vector< _SlabValues< Vertex > >& sValues , int threads );
-	template< class Vertex >
-	void _setSliceIsoEdges( LocalDepth depth , int slice ,         std::vector< _SlabValues< Vertex > >& slabValues , int threads );
-	template< class Vertex >
-	void _setSliceIsoEdges( LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& slabValues , int threads );
-	template< class Vertex >
-	void _setXSliceIsoEdges( LocalDepth depth , int slice , std::vector< _SlabValues< Vertex > >& slabValues , int threads );
-	template< class Vertex >
-	void _copyFinerSliceIsoEdgeKeys( LocalDepth depth , int slice ,         std::vector< _SlabValues< Vertex > >& sValues , int threads );
-	template< class Vertex >
-	void _copyFinerSliceIsoEdgeKeys( LocalDepth depth , int slice , int z , std::vector< _SlabValues< Vertex > >& sValues , int threads );
-	template< class Vertex >
-	void _copyFinerXSliceIsoEdgeKeys( LocalDepth depth , int slab , std::vector< _SlabValues< Vertex > >& sValues , int threads );
-
-	template< class Vertex >
-	void _setIsoSurface( LocalDepth depth , int offset , const _SliceValues< Vertex >& bValues , const _SliceValues< Vertex >& fValues , const _XSliceValues< Vertex >& xValues , CoredMeshData< Vertex >& mesh , bool polygonMesh , bool addBarycenter , int& vOffset , int threads );
-
-	template< class Vertex >
-	static int _addIsoPolygons( CoredMeshData< Vertex >& mesh , std::vector< std::pair< int , Vertex > >& polygon , bool polygonMesh , bool addBarycenter , int& vOffset );
-
-	template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-	bool _getIsoVertex( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , ConstPointSupportKey< WeightDegree >& weightKey , ConstPointSupportKey< ColorDegree >& colorKey , const TreeOctNode* node , int edgeIndex , int z , const _SliceValues< Vertex >& sValues , Vertex& vertex );
-	template< int WeightDegree , int ColorDegree , BoundaryType BType , class Vertex >
-	bool _getIsoVertex( const BSplineData< ColorDegree , BType >* colorBSData , const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , Real isoValue , ConstPointSupportKey< WeightDegree >& weightKey , ConstPointSupportKey< ColorDegree >& colorKey , const TreeOctNode* node , int cornerIndex , const _SliceValues< Vertex >& bValues , const _SliceValues< Vertex >& fValues , Vertex& vertex );
-
-	void _init( TreeOctNode* node , LocalDepth maxDepth , bool (*Refine)( LocalDepth d , LocalOffset off ) );
-
-	double _maxMemoryUsage , _localMemoryUsage;
-public:
-	int threads;
-	double maxMemoryUsage( void ) const { return _maxMemoryUsage; }
-	double localMemoryUsage( void ) const { return _localMemoryUsage; }
-	void resetLocalMemoryUsage( void ){ _localMemoryUsage = 0; }
-	double memoryUsage( void );
-
-	Octree( void );
-
-	void init( LocalDepth maxDepth , bool (*Refine)( LocalDepth d , LocalOffset off ) );
-	template< class Data >
-	int init( OrientedPointStream< Real >& pointStream , LocalDepth maxDepth , bool useConfidence , std::vector< PointSample >& samples , std::vector< ProjectiveData< Data , Real > >* sampleData );
-	template< int DensityDegree >
-	typename Octree::template DensityEstimator< DensityDegree >* setDensityEstimator( const std::vector< PointSample >& samples , LocalDepth splatDepth , Real samplesPerNode );
-	template< int NormalDegree , int DensityDegree >
-	SparseNodeData< Point3D< Real > , NormalDegree > setNormalField( const std::vector< PointSample >& samples , const DensityEstimator< DensityDegree >& density , Real& pointWeightSum , bool forceNeumann );
-	template< int DataDegree , bool CreateNodes , int DensityDegree , class Data >
-	SparseNodeData< ProjectiveData< Data , Real > , DataDegree > setDataField( const std::vector< PointSample >& samples , std::vector< ProjectiveData< Data , Real > >& sampleData , const DensityEstimator< DensityDegree >* density );
-	template< int MaxDegree , int FEMDegree , BoundaryType FEMBType , class HasDataFunctor > void inalizeForBroodedMultigrid( LocalDepth fullDepth , const HasDataFunctor& F , std::vector< int >* map=NULL );
-
-	// Generate an empty set of constraints
-	template< int FEMDegree > DenseNodeData< Real , FEMDegree > initDenseNodeData( void );
-
-	// Add finite-elements constraints (derived from a sparse scalar field)
-	template< int FEMDegree , BoundaryType FEMBType , int SFDegree , BoundaryType SFBType , class FEMSFConstraintFunctor > void addFEMConstraints( const FEMSFConstraintFunctor& F , const SparseNodeData< Real , SFDegree >& sfCoefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-	{ return _addFEMConstraints< FEMDegree , FEMBType , SFDegree , SFBType , FEMSFConstraintFunctor , const SparseNodeData< Real   , SFDegree > , Real , double >( F , sfCoefficients , constraints , maxDepth ); }
-	// Add finite-elements constraints (derived from a dense scalar field)
-	template< int FEMDegree , BoundaryType FEMBType , int SFDegree , BoundaryType SFBType , class FEMSFConstraintFunctor > void addFEMConstraints( const FEMSFConstraintFunctor& F , const  DenseNodeData< Real , SFDegree >& sfCoefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-	{ return _addFEMConstraints< FEMDegree , FEMBType , SFDegree , SFBType , FEMSFConstraintFunctor , const  DenseNodeData< Real   , SFDegree > , Real , double >( F , sfCoefficients , constraints , maxDepth ); }
-	// Add finite-elements constraints (derived from a sparse vector field)
-	template< int FEMDegree , BoundaryType FEMBType , int VFDegree , BoundaryType VFBType , class FEMVFConstraintFunctor > void addFEMConstraints( const FEMVFConstraintFunctor& F , const SparseNodeData< Point3D< Real > , VFDegree >& vfCoefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-	{ return _addFEMConstraints< FEMDegree , FEMBType , VFDegree , VFBType , FEMVFConstraintFunctor , const SparseNodeData< Point3D< Real > , VFDegree > , Point3D< Real > , Point3D< double > >( F , vfCoefficients , constraints , maxDepth ); }
-	// Add finite-elements constraints (derived from a dense vector field)
-	template< int FEMDegree , BoundaryType FEMBType , int VFDegree , BoundaryType VFBType , class FEMVFConstraintFunctor > void addFEMConstraints( const FEMVFConstraintFunctor& F , const  DenseNodeData< Point3D< Real > , VFDegree >& vfCoefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth )
-	{ return _addFEMConstraints< FEMDegree , FEMBType , VFDegree , VFBType , FEMVFConstraintFunctor , const  DenseNodeData< Point3D< Real > , VFDegree > , Point3D< Real > , Point3D< double > >( F , vfCoefficients , constraints , maxDepth ); }
-	// Add interpolation constraints
-	template< int FEMDegree , BoundaryType FEMBType , bool HasGradients > void addInterpolationConstraints( const InterpolationInfo< HasGradients >& interpolationInfo , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth );
-
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor > double dot( const DotFunctor& F , const SparseNodeData< Real , Degree1 >& coefficients1 , const SparseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , false >( F , (const InterpolationInfo< false >*)NULL , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor > double dot( const DotFunctor& F , const SparseNodeData< Real , Degree1 >& coefficients1 , const DenseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , false >( F , (const InterpolationInfo< false >*)NULL , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor > double dot( const DotFunctor& F , const DenseNodeData< Real , Degree1 >& coefficients1 , const SparseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , false >( F , (const InterpolationInfo< false >*)NULL , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor > double dot( const DotFunctor& F , const DenseNodeData< Real , Degree1 >& coefficients1 , const DenseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , false >( F , (const InterpolationInfo< false >*)NULL , coefficients1 , coefficients2 ); }
-
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor , bool HasGradients > double dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const SparseNodeData< Real , Degree1 >& coefficients1 , const SparseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , HasGradients >( F , iInfo , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor , bool HasGradients > double dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const SparseNodeData< Real , Degree1 >& coefficients1 , const DenseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , HasGradients >( F , iInfo , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor , bool HasGradients > double dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const DenseNodeData< Real , Degree1 >& coefficients1 , const SparseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , HasGradients >( F , iInfo , coefficients1 , coefficients2 ); }
-	template< int Degree1 , BoundaryType BType1 , int Degree2 , BoundaryType BType2 , class DotFunctor , bool HasGradients > double dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const DenseNodeData< Real , Degree1 >& coefficients1 , const DenseNodeData< Real , Degree2 >& coefficients2 ) const
-	{ return _dot< Degree1 , BType1 , Degree2 , BType2 , DotFunctor , HasGradients >( F , iInfo , coefficients1 , coefficients2 ); }
-
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	void setSystemMatrix( const FEMSystemFunctor& F , const InterpolationInfo< HasGradients >* interpolationInfo , LocalDepth depth , SparseMatrix< Real >& matrix ) const;
-
-	// Solve the linear system
-	struct SolverInfo
-	{
-		// How to solve
-		LocalDepth cgDepth;
-		int iters;
-		double cgAccuracy , lowResIterMultiplier;
-		// What to output
-		bool verbose , showResidual;
-
-		SolverInfo( void ) : cgDepth(0) , iters(1), cgAccuracy(0) , lowResIterMultiplier(0) , verbose(false) , showResidual(false) { ; }
-	};
-	template< int FEMDegree , BoundaryType BType , class FEMSystemFunctor , bool HasGradients >
-	DenseNodeData< Real , FEMDegree > solveSystem( const FEMSystemFunctor& F , InterpolationInfo< HasGradients >* iData , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxSolveDepth , const SolverInfo& solverInfo );
-
-	template< int FEMDegree , BoundaryType BType , int WeightDegree , int ColorDegree , class Vertex >
-	void getMCIsoSurface( const DensityEstimator< WeightDegree >* densityWeights , const SparseNodeData< ProjectiveData< Point3D< Real > , Real > , ColorDegree >* colorData , const DenseNodeData< Real , FEMDegree >& solution , Real isoValue , CoredMeshData< Vertex >& mesh , bool nonLinearFit=true , bool addBarycenter=false , bool polygonMesh=false );
-
-
-	const TreeOctNode& tree( void ) const{ return *_tree; }
-	size_t leaves( void ) const { return _tree->leaves(); }
-	size_t nodes( void ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( IsActiveNode( n ) ) count++ ; return count; }
-	size_t ghostNodes( void ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( !IsActiveNode( n ) ) count++ ; return count; }
-	inline size_t validSpaceNodes( void ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( isValidSpaceNode( n ) ) count++ ;  return count; }
-	inline size_t validSpaceNodes( LocalDepth d ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( _localDepth(n)==d && isValidSpaceNode( n ) ) count++ ; return count; }
-	template< int Degree , BoundaryType BType > size_t validFEMNodes( void ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( isValidFEMNode< Degree , BType >( n ) ) count++ ;  return count; }
-	template< int Degree , BoundaryType BType > size_t validFEMNodes( LocalDepth d ) const { int count = 0 ; for( const TreeOctNode* n=_tree->nextNode() ; n ; n=_tree->nextNode( n ) ) if( _localDepth(n)==d && isValidFEMNode< Degree , BType >( n ) ) count++ ; return count; }
-	LocalDepth depth( void ) const { return _localMaxDepth( _tree ); }
-	void resetNodeIndices( void ){ _NodeCount = 0 ; for( TreeOctNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) _NodeInitializer( *node ) , node->nodeData.flags=0; }
-
-protected:
-	template< class D > static bool _IsZero( const D& d );
-	template< class D > static Real _Dot( const D& d1 , const D& d2 );
-	template< int FEMDegree , BoundaryType FEMBType , int CDegree , BoundaryType CBType , class FEMConstraintFunctor , class Coefficients , class D , class _D >
-	void _addFEMConstraints( const FEMConstraintFunctor& F , const Coefficients& coefficients , DenseNodeData< Real , FEMDegree >& constraints , LocalDepth maxDepth );
-	template< int FEMDegree1 , BoundaryType FEMBType1 , int FEMDegree2 , BoundaryType FEMBType2 , class DotFunctor , bool HasGradients , class Coefficients1 , class Coefficients2 >
-	double _dot( const DotFunctor& F , const InterpolationInfo< HasGradients >* iInfo , const Coefficients1& coefficients1 , const Coefficients2& coefficients2 ) const;
-};
-template< class Real > int Octree< Real >::_NodeCount = 0;
-
-
-template< class Real > void Reset( void ){ Octree< Real >::ResetNodeCount(); }
-
-
-#include "MultiGridOctreeData.inl"
-#include "MultiGridOctreeData.SortedTreeNodes.inl"
-#include "MultiGridOctreeData.WeightedSamples.inl"
-#include "MultiGridOctreeData.System.inl"
-#include "MultiGridOctreeData.IsoSurface.inl"
-#include "MultiGridOctreeData.Evaluation.inl"
-#endif // MULTI_GRID_OCTREE_DATA_INCLUDED
diff --git a/Src/MultiGridOctreeData.inl b/Src/MultiGridOctreeData.inl
deleted file mode 100644
index 72195b9..0000000
--- a/Src/MultiGridOctreeData.inl
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-#ifdef FAST_SET_UP
-#include <functional>
-#endif // FAST_SET_UP
-#include <cmath>
-#include "PointStream.h"
-
-#define MEMORY_ALLOCATOR_BLOCK_SIZE 1<<12
-//#define MEMORY_ALLOCATOR_BLOCK_SIZE 0
-
-const double MATRIX_ENTRY_EPSILON = 0;
-const double EPSILON              = 1e-6;
-const double ROUND_EPS            = 1e-5;
-
-//////////////////
-// TreeNodeData //
-//////////////////
-TreeNodeData::TreeNodeData( void ){ flags = 0; }
-TreeNodeData::~TreeNodeData( void ) { }
-
-
-////////////
-// Octree //
-////////////
-template< class Real >
-double Octree< Real >::memoryUsage( void )
-{
-	double mem = double( MemoryInfo::Usage() ) / (1<<20);
-	_maxMemoryUsage = std::max< double >( mem , _maxMemoryUsage );
-	_localMemoryUsage = std::max< double >( mem , _localMemoryUsage );
-	return mem;
-}
-
-template< class Real > Octree< Real >::Octree( void ) : threads(1) , _maxMemoryUsage(0) , _localMemoryUsage(0)
-{
-	_tree = TreeOctNode::NewBrood( _NodeInitializer );
-	_tree->initChildren( _NodeInitializer ) , _spaceRoot = _tree->children;
-	_depthOffset = 1;
-}
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-void Octree< Real >::functionIndex( const TreeOctNode* node , int idx[3] ) const
-{
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-	for( int dd=0 ; dd<DIMENSION ; dd++ ) idx[dd] = BSplineData< FEMDegree , BType >::FunctionIndex( d , off[dd] );
-}
-
-template< class Real >
-OctNode< TreeNodeData >* Octree< Real >::leaf( Point3D< Real > p )
-{
-	if( !_InBounds( p ) ) return NULL;
-	Point3D< Real > center = Point3D< Real >( Real(0.5) , Real(0.5) , Real(0.5) );
-	Real width = Real(1.0);
-	TreeOctNode* node = _spaceRoot;
-	while( node->children )
-	{
-		int cIndex = TreeOctNode::CornerIndex( center , p );
-		node = node->children + cIndex;
-		width /= 2;
-		if( cIndex&1 ) center[0] += width/2;
-		else           center[0] -= width/2;
-		if( cIndex&2 ) center[1] += width/2;
-		else           center[1] -= width/2;
-		if( cIndex&4 ) center[2] += width/2;
-		else           center[2] -= width/2;
-	}
-	return node;
-}
-template< class Real >
-const OctNode< TreeNodeData >* Octree< Real >::leaf( Point3D< Real > p ) const
-{
-	if( !_InBounds( p ) ) return NULL;
-	Point3D< Real > center = Point3D< Real >( Real(0.5) , Real(0.5) , Real(0.5) );
-	Real width = Real(1.0);
-	TreeOctNode* node = _spaceRoot;
-	while( node->children )
-	{
-		int cIndex = TreeOctNode::CornerIndex( center , p );
-		node = node->children + cIndex;
-		width /= 2;
-		if( cIndex&1 ) center[0] += width/2;
-		else           center[0] -= width/2;
-		if( cIndex&2 ) center[1] += width/2;
-		else           center[1] -= width/2;
-		if( cIndex&4 ) center[2] += width/2;
-		else           center[2] -= width/2;
-	}
-	return node;
-}
-template< class Real > bool Octree< Real >::_InBounds( Point3D< Real > p ){ return p[0]>=Real(0.) && p[0]<=Real(1.0) && p[1]>=Real(0.) && p[1]<=Real(1.0) && p[2]>=Real(0.) && p[2]<=Real(1.0); }
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-bool Octree< Real >::isValidFEMNode( const TreeOctNode* node ) const
-{
-	if( GetGhostFlag( node ) ) return false;
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-	if( d<0 ) return false;
-	return !BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[0] ) && !BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[1] ) && !BSplineEvaluationData< FEMDegree , BType >::OutOfBounds( d , off[2] );
-}
-template< class Real >
-bool Octree< Real >::isValidSpaceNode( const TreeOctNode* node ) const
-{
-	if( !node ) return false;
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-	if( d<0 ) return false;
-	int res = 1<<d;
-	return off[0]>=0 && off[0]<res && off[1]>=0 && off[1]<res && off[2]>=0 && off[2]<res;
-}
-template< class Real >
-template< int Degree , BoundaryType BType >
-void Octree< Real >::_setFullDepth( TreeOctNode* node , LocalDepth depth ) const
-{
-	bool refine = false;
-	LocalDepth d ; LocalOffset off;
-	_localDepthAndOffset( node , d , off );
-	if( d<depth )
-		if( d<0 ) refine = true;
-		else if( BType==BOUNDARY_FREE && !_outOfBounds< Degree , BType >( node ) ) refine = true;
-		else if( !BSplineSupportSizes< Degree >::OutOfBounds( d , off[0] ) && !BSplineSupportSizes< Degree >::OutOfBounds( d , off[1] ) && !BSplineSupportSizes< Degree >::OutOfBounds( d , off[2] ) ) refine = true;
-	if( refine )
-	{
-		if( !node->children ) node->initChildren( _NodeInitializer );
-		for( int c=0 ; c<Cube::CORNERS ; c++ ) _setFullDepth< Degree , BType >( node->children+c , depth );
-	}
-}
-template< class Real >
-template< int Degree , BoundaryType BType >
-void Octree< Real >::_setFullDepth( LocalDepth depth )
-{
-	if( !_tree->children ) _tree->initChildren( _NodeInitializer );
-	for( int c=0 ; c<Cube::CORNERS ; c++ ) _setFullDepth< Degree , BType >( _tree->children+c , depth );
-}
-
-template< class Real , bool HasGradients >
-struct _PointDataAccumulator_
-{
-#if POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , HasGradients >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Point3D< Real > center , Real width , Real weight );
-#else // !POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , HasGradients >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Real weight );
-#endif // POINT_DATA_RES
-};
-template< class Real >
-struct _PointDataAccumulator_< Real , false >
-{
-#if POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , false >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Point3D< Real > center , Real width , Real weight ){ pData.addPoint( SinglePointData< Real , false >( position , value , weight ) , center , width ); }
-#else // !POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , false >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Real weight ){ pData.position += position , pData.value += value , pData.weight += weight; }
-#endif // POINT_DATA_RES
-};
-template< class Real >
-struct _PointDataAccumulator_< Real , true >
-{
-#if POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , true >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Point3D< Real > center , Real width , Real weight ){ pData.addPoint( SinglePointData< Real , true >( position , value , gradient , weight ) , center , width ); }
-#else // !POINT_DATA_RES
-	static inline void _AddToPointData_( PointData< Real , true >& pData , Point3D< Real > position , Real value , Point3D< Real > gradient , Real weight ){ pData.position += position , pData.value += value , pData.gradient += gradient , pData.weight += weight; }
-#endif // POINT_DATA_RES
-};
-
-template< class Real >
-void Octree< Real >::_init( TreeOctNode* node , LocalDepth maxDepth , bool (*Refine)( LocalDepth , LocalOffset ) )
-{
-	if( _localDepth( node )<maxDepth )
-	{
-		LocalDepth d ; LocalOffset off;
-		_localDepthAndOffset( node , d , off );
-		if( Refine( d , off ) )
-		{
-			node->initChildren( _NodeInitializer );
-			for( int c=0 ; c<Cube::CORNERS ; c++ ) _init( node->children + c , maxDepth , Refine );
-		}
-	}
-}
-template< class Real > void Octree< Real >::init( LocalDepth maxDepth , bool (*Refine)( LocalDepth , LocalOffset ) ){ _init( _spaceRoot , maxDepth , Refine ); }
-template< class Real >
-template< class Data >
-int Octree< Real >::init( OrientedPointStream< Real >& pointStream , LocalDepth maxDepth , bool useConfidence , std::vector< PointSample >& samples , std::vector< ProjectiveData< Data , Real > >* sampleData )
-{
-	OrientedPointStreamWithData< Real , Data >& pointStreamWithData = ( OrientedPointStreamWithData< Real , Data >& )pointStream;
-
-	// Add the point data
-	int outOfBoundPoints = 0 , zeroLengthNormals = 0 , undefinedNormals = 0 , pointCount = 0;
-	{
-		std::vector< int > nodeToIndexMap;
-		Point3D< Real > p , n;
-		OrientedPoint3D< Real > _p;
-		Data _d;
-		while( ( sampleData ? pointStreamWithData.nextPoint( _p , _d ) : pointStream.nextPoint( _p ) ) )
-		{
-			p = Point3D< Real >(_p.p) , n = Point3D< Real >(_p.n);
-			Real len = (Real)Length( n );
-			if( !_InBounds(p) ){ outOfBoundPoints++ ; continue; }
-			if( !len ){ zeroLengthNormals++ ; continue; }
-			if( len!=len ){ undefinedNormals++ ; continue; }
-			n /= len;
-			Point3D< Real > center = Point3D< Real >( Real(0.5) , Real(0.5) , Real(0.5) );
-			Real width = Real(1.0);
-			TreeOctNode* temp = _spaceRoot;
-			LocalDepth depth = _localDepth( temp );
-			while( depth<maxDepth )
-			{
-				if( !temp->children ) temp->initChildren( _NodeInitializer );
-				int cIndex = TreeOctNode::CornerIndex( center , p );
-				temp = temp->children + cIndex;
-				width /= 2;
-				if( cIndex&1 ) center[0] += width/2;
-				else           center[0] -= width/2;
-				if( cIndex&2 ) center[1] += width/2;
-				else           center[1] -= width/2;
-				if( cIndex&4 ) center[2] += width/2;
-				else           center[2] -= width/2;
-				depth++;
-			}
-			Real weight = (Real)( useConfidence ? len : 1. );
-			int nodeIndex = temp->nodeData.nodeIndex;
-			if( nodeIndex>=nodeToIndexMap.size() ) nodeToIndexMap.resize( nodeIndex+1 , -1 );
-			int idx = nodeToIndexMap[ nodeIndex ];
-			if( idx==-1 )
-			{
-				idx = (int)samples.size();
-				nodeToIndexMap[ nodeIndex ] = idx;
-				samples.resize( idx+1 ) , samples[idx].node = temp;
-				if( sampleData ) sampleData->resize( idx+1 );
-			}
-			samples[idx].sample += ProjectiveData< OrientedPoint3D< Real > , Real >( OrientedPoint3D< Real >( p * weight , n * weight ) , weight );
-			if( sampleData ) (*sampleData)[ idx ] += ProjectiveData< Data , Real >( _d * weight , weight );
-			pointCount++;
-		}
-		pointStream.reset();
-	}
-	if( outOfBoundPoints  ) fprintf( stderr , "[WARNING] Found out-of-bound points: %d\n" , outOfBoundPoints );
-	if( zeroLengthNormals ) fprintf( stderr , "[WARNING] Found zero-length normals: %d\n" , zeroLengthNormals );
-	if( undefinedNormals  ) fprintf( stderr , "[WARNING] Found undefined normals: %d\n" , undefinedNormals );
-
-	memoryUsage();
-	return pointCount;
-}
-template< class Real >
-template< int DensityDegree >
-typename Octree< Real >::template DensityEstimator< DensityDegree >* Octree< Real >::setDensityEstimator( const std::vector< PointSample >& samples , LocalDepth splatDepth , Real samplesPerNode )
-{
-	LocalDepth maxDepth = _localMaxDepth( _tree );
-	splatDepth = std::max< LocalDepth >( 0 , std::min< LocalDepth >( splatDepth , maxDepth ) );
-	DensityEstimator< DensityDegree >* _density = new DensityEstimator< DensityDegree >( splatDepth );
-	DensityEstimator< DensityDegree >& density = *_density;
-	PointSupportKey< DensityDegree > densityKey;
-	densityKey.set( _localToGlobal( splatDepth ) );
-
-#ifdef FAST_SET_UP
-	std::vector< int > sampleMap( NodeCount() , -1 );
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<samples.size() ; i++ ) if( samples[i].sample.weight>0 ) sampleMap[ samples[i].node->nodeData.nodeIndex ] = i;
-	std::function< ProjectiveData< OrientedPoint3D< Real > , Real > ( TreeOctNode* ) > SetDensity = [&] ( TreeOctNode* node )
-	{
-		ProjectiveData< OrientedPoint3D< Real > , Real > sample;
-		LocalDepth d = _localDepth( node );
-		int idx = node->nodeData.nodeIndex;
-		if( node->children )
-			for( int c=0 ; c<Cube::CORNERS ; c++ )
-			{
-				ProjectiveData< OrientedPoint3D< Real > , Real > s = SetDensity( node->children + c );
-				if( d<=splatDepth && s.weight>0 )
-				{
-					Point3D< Real > p = s.data.p / s.weight;
-					Real w = s.weight / samplesPerNode;
-					_addWeightContribution( density , node , p , densityKey , w );
-				}
-				sample += s;
-			}
-		else if( idx<sampleMap.size() && sampleMap[idx]!=-1 )
-		{
-			sample = samples[ sampleMap[ idx ] ].sample;
-			if( d<=splatDepth && sample.weight>0 )
-			{
-				Point3D< Real > p = sample.data.p / sample.weight;
-				Real w = sample.weight / samplesPerNode;
-				_addWeightContribution( density , node , p , densityKey , w );
-			}
-		}
-		return sample;
-	};
-	SetDensity( _spaceRoot );
-#else // !FAST_SET_UP
-	for( int i=0 ; i<samples.size() ; i++ )
-	{
-		const TreeOctNode* node = samples[i].node;
-		const ProjectiveData< OrientedPoint3D< Real > , Real >& sample = samples[i].sample;
-		if( sample.weight>0 )
-		{
-			Point3D< Real > p = sample.data.p / sample.weight;
-			Real w = sample.weight / samplesPerNode;
-			for( TreeOctNode* _node=(TreeOctNode*)node ; _node ; _node=_node->parent ) if( _localDepth( _node )<=splatDepth ) _addWeightContribution( density , _node , p , densityKey , w );
-		}
-	}
-#endif // FAST_SET_UP
-
-	memoryUsage();
-	return _density;
-}
-template< class Real >
-template< int NormalDegree , int DensityDegree >
-SparseNodeData< Point3D< Real > , NormalDegree > Octree< Real >::setNormalField( const std::vector< PointSample >& samples , const DensityEstimator< DensityDegree >& density , Real& pointWeightSum , bool forceNeumann )
-{
-	LocalDepth maxDepth = _localMaxDepth( _tree );
-	PointSupportKey< DensityDegree > densityKey;
-	PointSupportKey< NormalDegree > normalKey;
-	densityKey.set( _localToGlobal( maxDepth ) ) , normalKey.set( _localToGlobal( maxDepth ) );
-
-	Real weightSum = 0;
-	pointWeightSum = 0;
-	SparseNodeData< Point3D< Real > , NormalDegree > normalField;
-	for( int i=0 ; i<samples.size() ; i++ )
-	{
-		const ProjectiveData< OrientedPoint3D< Real > , Real >& sample = samples[i].sample;
-		if( sample.weight>0 )
-		{
-			Point3D< Real > p = sample.data.p / sample.weight , n = sample.data.n;
-			weightSum += sample.weight;
-			if( !_InBounds(p) ){ fprintf( stderr , "[WARNING] Octree:setNormalField: Point sample is out of bounds\n" ) ; continue; }
-			pointWeightSum += _splatPointData< true >( density , p , n , normalField , densityKey , normalKey , 0 , maxDepth , 3 );
-		}
-	}
-	pointWeightSum /= weightSum;
-	memoryUsage();
-
-	return normalField;
-}
-template< class Real >
-template< int DataDegree , bool CreateNodes , int DensityDegree , class Data >
-SparseNodeData< ProjectiveData< Data , Real > , DataDegree > Octree< Real >::setDataField( const std::vector< PointSample >& samples , std::vector< ProjectiveData< Data , Real > >& sampleData , const DensityEstimator< DensityDegree >* density )
-{
-	LocalDepth maxDepth = _localMaxDepth( _tree );
-	PointSupportKey< DensityDegree > densityKey;
-	PointSupportKey< DataDegree > dataKey;
-	densityKey.set( _localToGlobal( maxDepth ) ) , dataKey.set( _localToGlobal( maxDepth ) );
-
-	SparseNodeData< ProjectiveData< Data , Real > , DataDegree > dataField;
-	for( int i=0 ; i<samples.size() ; i++ )
-	{
-		const ProjectiveData< OrientedPoint3D< Real > , Real >& sample = samples[i].sample;
-		const ProjectiveData< Data , Real >& data = sampleData[i];
-		Point3D< Real > p = sample.weight==0 ? sample.data.p : sample.data.p / sample.weight;
-		if( !_InBounds(p) ){ fprintf( stderr , "[WARNING] Point is out of bounds: %f %f %f <- %f %f %f [%f]\n" , p[0] , p[1] , p[2] , sample.data.p[0] , sample.data.p[1] , sample.data.p[2] , sample.weight ) ; continue; }
-		_multiSplatPointData< CreateNodes >( density , (TreeOctNode*)samples[i].node , p , data , dataField , densityKey , dataKey , 2 );
-	}
-	memoryUsage();
-	return dataField;
-}
-template< class Real >
-template< int MaxDegree , int FEMDegree , BoundaryType FEMBType , class HasDataFunctor >
-void Octree< Real >::inalizeForBroodedMultigrid( LocalDepth fullDepth , const HasDataFunctor& F , std::vector< int >* map )
-{
-	if( FEMDegree>MaxDegree ) fprintf( stderr , "[ERROR] MaxDegree must be at least as large as the FEM degree: %d <= %d\n" , FEMDegree , MaxDegree );
-	while( _localInset( 0 ) + BSplineEvaluationData< MaxDegree , BOUNDARY_FREE >::Begin( 0 )<0 || _localInset( 0 ) + BSplineEvaluationData< MaxDegree , BOUNDARY_FREE >::End( 0 )>(1<<_depthOffset) )
-	{
-		//                       +-+-+-+-+-+-+-+-+
-		//                       | | | | | | | | |
-		//                       +-+-+-+-+-+-+-+-+
-		//                       | | | | | | | | |
-		//          +-+-+-+-+    +-+-+-+-+-+-+-+-+
-		//          | | | | |    | | | | | | | | |
-		// +-+-+    +-+-+-+-+    +-+-+-+-+-+-+-+-+
-		// |*| |    | | | | |    | | | | | | | | |
-		// +-o-+ -> +-+-o-+-+ -> +-+-+-+-o-+-+-+-+
-		// | | |    | | |*| |    | | | | |*| | | |
-		// +-+-+    +-+-+-+-+    +-+-+-+-+-+-+-+-+
-		//          | | | | |    | | | | | | | | |
-		//          +-+-+-+-+    +-+-+-+-+-+-+-+-+
-		//                       | | | | | | | | |
-		//                       +-+-+-+-+-+-+-+-+
-		//                       | | | | | | | | |
-		//                       +-+-+-+-+-+-+-+-+
-
-		TreeOctNode* newSpaceRootParent = TreeOctNode::NewBrood( _NodeInitializer );
-		TreeOctNode* oldSpaceRootParent = _spaceRoot->parent;
-		int corner = _depthOffset<=1 ? Cube::CORNERS-1 : 0;
-		newSpaceRootParent[corner].children = _spaceRoot;
-		oldSpaceRootParent->children = newSpaceRootParent;
-		for( int c=0 ; c<Cube::CORNERS ; c++ ) _spaceRoot[c].parent = newSpaceRootParent + corner , newSpaceRootParent[c].parent = oldSpaceRootParent;
-		_depthOffset++;
-	}
-	int d=0 , off[] = { 0 , 0 , 0 };
-	TreeOctNode::ResetDepthAndOffset( _tree , d , off );
-	_maxDepth = _localMaxDepth( _tree );
-
-	// Make the low-resolution part of the tree be complete
-	_fullDepth = std::max< LocalDepth >( 0 , std::min< LocalDepth >( _maxDepth , fullDepth ) );
-	_setFullDepth< MaxDegree , BOUNDARY_FREE >( _fullDepth );
-	// Clear all the flags and make everything that is not low-res a ghost node
-	for( TreeOctNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) node->nodeData.flags = 0 , SetGhostFlag( node , _localDepth( node )>_fullDepth );
-
-	// Set the ghost nodes for the high-res part of the tree
-	_clipTree( F );
-
-	const int OverlapRadius = -BSplineOverlapSizes< MaxDegree , MaxDegree >::OverlapStart;
-	typename TreeOctNode::NeighborKey< OverlapRadius , OverlapRadius > neighborKey;
-	neighborKey.set( _localToGlobal( _maxDepth-1 ) );
-
-	for( LocalDepth d=_maxDepth-1 ; d>=0 ; d-- )
-		for( TreeOctNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) if( _localDepth( node )==d && IsActiveNode( node->children ) )
-		{
-			neighborKey.template getNeighbors< true >( node , _NodeInitializer );
-			for( int i=0 ; i<neighborKey.Width ; i++ ) for( int j=0 ; j<neighborKey.Width ; j++ ) for( int k=0 ; k<neighborKey.Width ; k++ ) SetGhostFlag( neighborKey.neighbors[ _localToGlobal(d) ].neighbors[i][j][k] , false );
-		}
-
-	_sNodes.set( *_tree , map );
-	_setValidityFlags< FEMDegree , FEMBType >();
-	for( TreeOctNode* node=_tree->nextNode() ; node ; node=_tree->nextNode( node ) ) if( !IsActiveNode( node ) ) node->nodeData.nodeIndex = -1;
-	memoryUsage();
-}
-
-
-template< class Real >
-template< int FEMDegree , BoundaryType BType >
-void Octree< Real >::_setValidityFlags( void )
-{
-	for( int i=0 ; i<_sNodes.size() ; i++ )
-	{
-		const unsigned char MASK = ~( TreeNodeData::SPACE_FLAG | TreeNodeData::FEM_FLAG );
-		_sNodes.treeNodes[i]->nodeData.flags &= MASK;
-		if( isValidSpaceNode( _sNodes.treeNodes[i] ) ) _sNodes.treeNodes[i]->nodeData.flags |= TreeNodeData::SPACE_FLAG;
-		if( isValidFEMNode< FEMDegree , BType >( _sNodes.treeNodes[i] ) ) _sNodes.treeNodes[i]->nodeData.flags |= TreeNodeData::FEM_FLAG;
-	}
-}
-
-// Trim off the branches of the tree (finer than _fullDepth) that don't contain data
-template< class Real >
-template< class HasDataFunctor >
-void Octree< Real >::_clipTree( const HasDataFunctor& f )
-{
-	// Because we are doing things in a brooded fashion, if any of the children has data then the whole brood is active
-	for( TreeOctNode* temp=_tree->nextNode() ; temp ; temp=_tree->nextNode(temp) ) if( temp->children && _localDepth( temp )>=_fullDepth )
-	{
-		bool hasData = false;
-		for( int c=0 ; c<Cube::CORNERS && !hasData ; c++ ) hasData |= f( temp->children + c );
-		for( int c=0 ; c<Cube::CORNERS ; c++ ) SetGhostFlag( temp->children+c , !hasData );
-	}
-}
-
-template< class Real >
-template< bool HasGradients >
-bool Octree< Real >::_setInterpolationInfoFromChildren( TreeOctNode* node , SparseNodeData< PointData< Real , HasGradients > , 0 >& interpolationInfo ) const
-{
-	if( IsActiveNode( node->children ) )
-	{
-		bool hasChildData = false;
-		PointData< Real , HasGradients > pData;
-#if POINT_DATA_RES
-		Point3D< Real > center;
-		Real width;
-		_centerAndWidth( node , center , width );
-		for( int c=0 ; c<Cube::CORNERS ; c++ )
-			if( _setInterpolationInfoFromChildren( node->children + c , interpolationInfo ) )
-			{
-				const PointData< Real , HasGradients >& _pData = interpolationInfo[ node->children + c ];
-				for( int cc=0 ; cc<PointData< Real , HasGradients >::SAMPLES ; cc++ )
-				{
-					int x[3];
-					PointData< Real , HasGradients >::SetIndices( _pData[cc].position / _pData[cc].weight , center , width , x );
-					pData[ x[0] + x[1]*PointData< Real , HasGradients >::RES + x[2]*PointData< Real , HasGradients >::RES*PointData< Real , HasGradients >::RES ] += _pData[cc];
-				}
-				hasChildData = true;
-			}
-#else // !POINT_DATA_RES
-		for( int c=0 ; c<Cube::CORNERS ; c++ )
-			if( _setInterpolationInfoFromChildren( node->children + c , interpolationInfo ) )
-			{
-				pData += interpolationInfo[ node->children + c ];
-				hasChildData = true;
-			}
-#endif // POINT_DATA_RES
-		if( hasChildData && IsActiveNode( node ) ) interpolationInfo[ node ] += pData;
-		return hasChildData;
-	}
-	else return interpolationInfo( node )!=NULL;
-}
-template< class Real >
-template< bool HasGradients >
-SparseNodeData< PointData< Real , HasGradients > , 0 > Octree< Real >::_densifyInterpolationInfo( const std::vector< PointSample >& samples , Real pointValue , int adaptiveExponent ) const
-{
-	SparseNodeData< PointData< Real , HasGradients > , 0 > iInfo;
-	for( int i=0 ; i<samples.size() ; i++ )
-	{
-		const TreeOctNode* node = samples[i].node;
-		const ProjectiveData< OrientedPoint3D< Real > , Real >& pData = samples[i].sample;
-		while( !IsActiveNode( node ) ) node = node->parent;
-		if( pData.weight )
-		{
-#if POINT_DATA_RES
-			Point3D< Real > center;
-			Real width;
-			_centerAndWidth( node , center , width );
-			_PointDataAccumulator_< Real , HasGradients >::_AddToPointData_( iInfo[node] , pData.data.p , pointValue * pData.weight , pData.data.n , center , width , pData.weight );
-#else // !POINT_DATA_RES
-			_PointDataAccumulator_< Real , HasGradients >::_AddToPointData_( iInfo[node] , pData.data.p , pointValue * pData.weight , pData.data.n , pData.weight );
-#endif // POINT_DATA_RES
-		}
-	}
-
-	// Set the interior values
-	_setInterpolationInfoFromChildren( _spaceRoot, iInfo );
-#pragma omp parallel for
-	for( int i=0 ; i<(int)iInfo.size() ; i++ )
-#if POINT_DATA_RES
-		for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ )
-		{
-			Real w = iInfo[i][c].weight;
-			iInfo[i][c] /= w ; iInfo[i][c].weight = w;
-		}
-#else // !POINT_DATA_RES
-	{
-		Real w = iInfo[i].weight;
-		iInfo[i] /= w ; iInfo[i].weight = w;
-	}
-#endif // POINT_DATA_RES
-	LocalDepth maxDepth = _localMaxDepth( _tree );
-
-	// Set the average position and scale the weights
-	for( const TreeOctNode* node=_tree->nextNode() ; node ; node=_tree->nextNode(node) ) if( IsActiveNode( node ) )
-	{
-		PointData< Real , HasGradients >* pData = iInfo( node );
-		if( pData )
-		{
-			int e = _localDepth( node ) * adaptiveExponent - ( maxDepth ) * (adaptiveExponent-1);
-#if POINT_DATA_RES
-			for( int c=0 ; c<PointData< Real , HasGradients >::SAMPLES ; c++ ) if( (*pData)[c].weight )
-			{
-				if( e<0 ) (*pData)[c].weight /= Real( 1<<(-e) );
-				else      (*pData)[c].weight *= Real( 1<<  e  );
-			}
-#else // !POINT_DATA_RES
-			if( e<0 ) pData->weight /= Real( 1<<(-e) );
-			else      pData->weight *= Real( 1<<  e  );
-#endif // POINT_DATA_RES
-		}
-	}
-	return iInfo;
-}
-////////////////
-// VertexData //
-////////////////
-long long VertexData::CenterIndex( const TreeOctNode* node , int maxDepth )
-{
-	int idx[DIMENSION];
-	return CenterIndex(node,maxDepth,idx);
-}
-long long VertexData::CenterIndex(const TreeOctNode* node,int maxDepth,int idx[DIMENSION])
-{
-	int d , o[3];
-	node->depthAndOffset( d , o );
-	for( int i=0 ; i<DIMENSION ; i++ ) idx[i] = BinaryNode::CornerIndex( maxDepth+1 , d+1 , o[i]<<1 , 1 );
-	return (long long)(idx[0]) | (long long)(idx[1])<<VERTEX_COORDINATE_SHIFT | (long long)(idx[2])<<(2*VERTEX_COORDINATE_SHIFT);
-}
-long long VertexData::CenterIndex( int depth , const int offSet[DIMENSION] , int maxDepth , int idx[DIMENSION] )
-{
-	for(int i=0;i<DIMENSION;i++) idx[i]=BinaryNode::CornerIndex( maxDepth+1 , depth+1 , offSet[i]<<1 , 1 );
-	return (long long)(idx[0]) | (long long)(idx[1])<<VERTEX_COORDINATE_SHIFT | (long long)(idx[2])<<(2*VERTEX_COORDINATE_SHIFT);
-}
-long long VertexData::CornerIndex(const TreeOctNode* node,int cIndex,int maxDepth)
-{
-	int idx[DIMENSION];
-	return CornerIndex(node,cIndex,maxDepth,idx);
-}
-long long VertexData::CornerIndex( const TreeOctNode* node , int cIndex , int maxDepth , int idx[DIMENSION] )
-{
-	int x[DIMENSION];
-	Cube::FactorCornerIndex( cIndex , x[0] , x[1] , x[2] );
-	int d , o[3];
-	node->depthAndOffset( d , o );
-	for( int i=0 ; i<DIMENSION ; i++ ) idx[i] = BinaryNode::CornerIndex( maxDepth+1 , d , o[i] , x[i] );
-	return CornerIndexKey( idx );
-}
-long long VertexData::CornerIndex( int depth , const int offSet[DIMENSION] , int cIndex , int maxDepth , int idx[DIMENSION] )
-{
-	int x[DIMENSION];
-	Cube::FactorCornerIndex( cIndex , x[0] , x[1] , x[2] );
-	for( int i=0 ; i<DIMENSION ; i++ ) idx[i] = BinaryNode::CornerIndex( maxDepth+1 , depth , offSet[i] , x[i] );
-	return CornerIndexKey( idx );
-}
-long long VertexData::CornerIndexKey( const int idx[DIMENSION] )
-{
-	return (long long)(idx[0]) | (long long)(idx[1])<<VERTEX_COORDINATE_SHIFT | (long long)(idx[2])<<(2*VERTEX_COORDINATE_SHIFT);
-}
-long long VertexData::FaceIndex(const TreeOctNode* node,int fIndex,int maxDepth){
-	int idx[DIMENSION];
-	return FaceIndex(node,fIndex,maxDepth,idx);
-}
-long long VertexData::FaceIndex(const TreeOctNode* node,int fIndex,int maxDepth,int idx[DIMENSION])
-{
-	int dir,offset;
-	Cube::FactorFaceIndex(fIndex,dir,offset);
-	int d,o[3];
-	node->depthAndOffset(d,o);
-	for(int i=0;i<DIMENSION;i++){idx[i]=BinaryNode::CornerIndex(maxDepth+1,d+1,o[i]<<1,1);}
-	idx[dir]=BinaryNode::CornerIndex(maxDepth+1,d,o[dir],offset);
-	return (long long)(idx[0]) | (long long)(idx[1])<<VERTEX_COORDINATE_SHIFT | (long long)(idx[2])<<(2*VERTEX_COORDINATE_SHIFT);
-}
-long long VertexData::EdgeIndex( const TreeOctNode* node , int eIndex , int maxDepth ){ int idx[DIMENSION] ; return EdgeIndex( node , eIndex , maxDepth , idx ); }
-long long VertexData::EdgeIndex( const TreeOctNode* node , int eIndex , int maxDepth , int idx[DIMENSION] )
-{
-	int o , i1 , i2;
-	int d , off[3];
-	node->depthAndOffset( d ,off );
-	Cube::FactorEdgeIndex( eIndex , o , i1 , i2 );
-	for( int i=0 ; i<DIMENSION ; i++ ) idx[i] = BinaryNode::CornerIndex( maxDepth+1 , d+1 , off[i]<<1 , 1 );
-	switch(o)
-	{
-		case 0:
-			idx[1] = BinaryNode::CornerIndex( maxDepth+1 , d , off[1] , i1 );
-			idx[2] = BinaryNode::CornerIndex( maxDepth+1 , d , off[2] , i2 );
-			break;
-		case 1:
-			idx[0] = BinaryNode::CornerIndex( maxDepth+1 , d , off[0] , i1 );
-			idx[2] = BinaryNode::CornerIndex( maxDepth+1 , d , off[2] , i2 );
-			break;
-		case 2:
-			idx[0] = BinaryNode::CornerIndex( maxDepth+1 , d , off[0] , i1 );
-			idx[1] = BinaryNode::CornerIndex( maxDepth+1 , d , off[1] , i2 );
-			break;
-	};
-	return (long long)(idx[0]) | (long long)(idx[1])<<VERTEX_COORDINATE_SHIFT | (long long)(idx[2])<<(2*VERTEX_COORDINATE_SHIFT);
-}
diff --git a/Src/MyMiscellany.h b/Src/MyMiscellany.h
new file mode 100644
index 0000000..96904cb
--- /dev/null
+++ b/Src/MyMiscellany.h
@@ -0,0 +1,531 @@
+/*
+Copyright (c) 2017, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+#ifndef MY_MISCELLANY_INCLUDED
+#define MY_MISCELLANY_INCLUDED
+
+#undef VERBOSE_MESSAGING
+
+//////////////////
+// OpenMP Stuff //
+//////////////////
+#ifdef _OPENMP
+#include <omp.h>
+#else // !_OPENMP
+inline int omp_get_num_procs  ( void ){ return 1; }
+inline int omp_get_max_threads( void ){ return 1; }
+inline int omp_get_thread_num ( void ){ return 0; }
+inline void omp_set_num_threads( int ){}
+inline void omp_set_nested( int ){}
+struct omp_lock_t{};
+inline void omp_init_lock( omp_lock_t* ){}
+inline void omp_set_lock( omp_lock_t* ){}
+inline void omp_unset_lock( omp_lock_t* ){}
+inline void omp_destroy_lock( omp_lock_t* ){}
+#endif // _OPENMP
+
+////////////////
+// Time Stuff //
+////////////////
+#include <string.h>
+#include <sys/timeb.h>
+#ifndef WIN32
+#include <sys/time.h>
+#endif // WIN32
+
+inline double Time( void )
+{
+#ifdef WIN32
+	struct _timeb t;
+	_ftime( &t );
+	return double( t.time ) + double( t.millitm ) / 1000.0;
+#else // WIN32
+	struct timeval t;
+	gettimeofday( &t , NULL );
+	return t.tv_sec + double( t.tv_usec ) / 1000000;
+#endif // WIN32
+}
+
+#include <cstdio>
+#include <ctime>
+#include <chrono>
+struct Timer
+{
+	Timer( void ){ _startCPUClock = std::clock() , _startWallClock = std::chrono::system_clock::now(); }
+	double cpuTime( void ) const{ return (std::clock() - _startCPUClock) / (double)CLOCKS_PER_SEC; };
+	double wallTime( void ) const{  std::chrono::duration<double> diff = (std::chrono::system_clock::now() - _startWallClock) ; return diff.count(); }
+protected:
+	std::clock_t _startCPUClock;
+	std::chrono::time_point< std::chrono::system_clock > _startWallClock;
+};
+
+///////////////
+// I/O Stuff //
+///////////////
+#if defined( _WIN32 ) || defined( _WIN64 )
+const char FileSeparator = '\\';
+#else // !_WIN
+const char FileSeparator = '/';
+#endif // _WIN
+
+#ifndef SetTempDirectory
+#if defined( _WIN32 ) || defined( _WIN64 )
+#define SetTempDirectory( tempDir , sz ) GetTempPath( (sz) , (tempDir) )
+#else // !_WIN32 && !_WIN64
+#define SetTempDirectory( tempDir , sz ) if( std::getenv( "TMPDIR" ) ) strcpy( tempDir , std::getenv( "TMPDIR" ) );
+#endif // _WIN32 || _WIN64
+#endif // !SetTempDirectory
+
+#include <stdarg.h>
+#include <vector>
+#include <string>
+struct MessageWriter
+{
+	char* outputFile;
+	bool echoSTDOUT;
+	MessageWriter( void ){ outputFile = NULL , echoSTDOUT = true; }
+	void operator() ( const char* format , ... )
+	{
+		if( outputFile )
+		{
+			FILE* fp = fopen( outputFile , "a" );
+			va_list args;
+			va_start( args , format );
+			vfprintf( fp , format , args );
+			fclose( fp );
+			va_end( args );
+		}
+		if( echoSTDOUT )
+		{
+			va_list args;
+			va_start( args , format );
+			vprintf( format , args );
+			va_end( args );
+		}
+	}
+	void operator() ( std::vector< char* >& messages  , const char* format , ... )
+	{
+		if( outputFile )
+		{
+			FILE* fp = fopen( outputFile , "a" );
+			va_list args;
+			va_start( args , format );
+			vfprintf( fp , format , args );
+			fclose( fp );
+			va_end( args );
+		}
+		if( echoSTDOUT )
+		{
+			va_list args;
+			va_start( args , format );
+			vprintf( format , args );
+			va_end( args );
+		}
+		// [WARNING] We are not checking the string is small enough to fit in 1024 characters
+		messages.push_back( new char[1024] );
+		char* str = messages.back();
+		va_list args;
+		va_start( args , format );
+		vsprintf( str , format , args );
+		va_end( args );
+		if( str[strlen(str)-1]=='\n' ) str[strlen(str)-1] = 0;
+	}
+	void operator() ( std::vector< std::string >& messages  , const char* format , ... )
+	{
+		if( outputFile )
+		{
+			FILE* fp = fopen( outputFile , "a" );
+			va_list args;
+			va_start( args , format );
+			vfprintf( fp , format , args );
+			fclose( fp );
+			va_end( args );
+		}
+		if( echoSTDOUT )
+		{
+			va_list args;
+			va_start( args , format );
+			vprintf( format , args );
+			va_end( args );
+		}
+		// [WARNING] We are not checking the string is small enough to fit in 1024 characters
+		char message[1024];
+		va_list args;
+		va_start( args , format );
+		vsprintf( message , format , args );
+		va_end( args );
+		if( message[strlen(message)-1]=='\n' ) message[strlen(message)-1] = 0;
+		messages.push_back( std::string( message ) );
+	}
+};
+
+/////////////////////////////////////
+// Exception, Warnings, and Errors //
+/////////////////////////////////////
+#include <exception>
+#include <string>
+namespace MKExceptions
+{
+#ifdef VERBOSE_MESSAGING
+	inline char *_MakeMessageString( const char *header , const char *fileName , int line , const char *functionName , const char *format , ... )
+	{
+		va_list args;
+		va_start( args , format );
+
+		// Formatting is:
+		// <header> <filename> (Line <line>)
+		// <header size> <function name>
+		// <header size> <format message>
+		char lineBuffer[25];
+		sprintf( lineBuffer , "(Line %d)" , line );
+		size_t _size , size=0;
+
+		// Line 1
+		size += strlen(header)+1;
+		size += strlen(fileName)+1;
+		size += strlen(lineBuffer)+1;
+
+		// Line 2
+		size += strlen(header)+1;
+		size += strlen(functionName)+1;
+
+		// Line 3
+		size += strlen(header)+1;
+		size += vsnprintf( NULL , 0 , format , args );
+
+		char *_buffer , *buffer = new char[ size+1 ];
+		_size = size , _buffer = buffer;
+
+		// Line 1
+		sprintf( _buffer , "%s " , header );
+		_buffer += strlen(header)+1;
+		_size -= strlen(header)+1;
+
+		sprintf( _buffer , "%s " , fileName );
+		_buffer += strlen(fileName)+1;
+		_size -= strlen(fileName)+1;
+
+		sprintf( _buffer , "%s\n" , lineBuffer );
+		_buffer += strlen(lineBuffer)+1;
+		_size -= strlen(lineBuffer)+1;
+
+		// Line 2
+		for( int i=0 ; i<strlen(header)+1 ; i++ ) _buffer[i] = ' ';
+		_buffer += strlen(header)+1;
+		_size -= strlen(header)+1;
+
+		sprintf( _buffer , "%s\n" , functionName );
+		_buffer += strlen(functionName)+1;
+		_size -= strlen(functionName)+1;
+
+		// Line 3
+		for( int i=0 ; i<strlen(header)+1 ; i++ ) _buffer[i] = ' ';
+		_buffer += strlen(header)+1;
+		_size -= strlen(header)+1;
+
+		vsnprintf( _buffer , _size+1 , format , args );
+
+		return buffer;
+	}
+
+	struct Exception : public std::exception
+	{
+		const char *what( void ) const noexcept { return _message.c_str(); }
+		template< typename ... Args >
+		Exception( const char *fileName , int line , const char *functionName , const char *format , Args ... args )
+		{
+			char *buffer = _MakeMessageString( "[EXCEPTION]" , fileName , line , functionName , format , args ... );
+			_message = std::string( buffer );
+			delete[] buffer;
+		}
+	protected:
+		std::string _message;
+	};
+
+	template< typename ... Args > void Throw( const char *fileName , int line , const char *functionName , const char *format , Args ... args ){ throw Exception( fileName , line , functionName , format , args ... ); }
+	template< typename ... Args >
+	void Warn( const char *fileName , int line , const char *functionName , const char *format , Args ... args )
+	{
+		char *buffer = _MakeMessageString( "[WARNING]" , fileName , line , functionName , format , args ... );
+		fprintf( stderr , "%s\n" , buffer );
+		delete[] buffer;
+	}
+	template< typename ... Args >
+	void ErrorOut( const char *fileName , int line , const char *functionName , const char *format , Args ... args )
+	{
+		char *buffer = _MakeMessageString( "[ERROR]" , fileName , line , functionName , format , args ... );
+		fprintf( stderr , "%s\n" , buffer );
+		delete[] buffer;
+		exit(0);
+	}
+#else // !VERBOSE_MESSAGING
+	inline char *_MakeMessageString( const char *header , const char *functionName , const char *format , ... )
+	{
+		va_list args;
+		va_start( args , format );
+
+		size_t _size , size = vsnprintf( NULL , 0 , format , args );
+		size += strlen(header)+1;
+		size += strlen(functionName)+2;
+
+		char *_buffer , *buffer = new char[ size+1 ];
+		_size = size , _buffer = buffer;
+
+		sprintf( _buffer , "%s " , header );
+		_buffer += strlen(header)+1;
+		_size -= strlen(header)+1;
+
+		sprintf( _buffer , "%s: " , functionName );
+		_buffer += strlen(functionName)+2;
+		_size -= strlen(functionName)+2;
+
+		vsnprintf( _buffer , _size+1 , format , args );
+
+		return buffer;
+	}
+	struct Exception : public std::exception
+	{
+		const char *what( void ) const noexcept { return _message.c_str(); }
+		template< typename ... Args >
+		Exception( const char *functionName , const char *format , Args ... args )
+		{
+			char *buffer = _MakeMessageString( "[EXCEPTION]" , functionName , format , args ... );
+			_message = std::string( buffer );
+			delete[] buffer;
+			exit(0);
+		}
+	protected:
+		std::string _message;
+	};
+	template< typename ... Args > void Throw( const char *functionName , const char *format , Args ... args ){ throw Exception( functionName , format , args ... ); }
+	template< typename ... Args >
+	void Warn( const char *functionName , const char *format , Args ... args )
+	{
+		char *buffer = _MakeMessageString( "[WARNING]" , functionName , format , args ... );
+		fprintf( stderr , "%s\n" , buffer );
+		delete[] buffer;
+	}
+	template< typename ... Args >
+	void ErrorOut( const char *functionName , const char *format , Args ... args )
+	{
+		char *buffer = _MakeMessageString( "[ERROR]" , functionName , format , args ... );
+		fprintf( stderr , "%s\n" , buffer );
+		delete[] buffer;
+	}
+#endif // VERBOSE_MESSAGING
+}
+#ifdef VERBOSE_MESSAGING
+#ifndef WARN
+#define WARN( ... ) MKExceptions::Warn( __FILE__ , __LINE__ , __FUNCTION__ , __VA_ARGS__ )
+#endif // WARN
+#ifndef WARN_ONCE
+#define WARN_ONCE( ... ) { static bool firstTime = true ; if( firstTime ) MKExceptions::Warn( __FILE__ , __LINE__ , __FUNCTION__ , __VA_ARGS__ ) ; firstTime = false; }
+#endif // WARN_ONCE
+#ifndef THROW
+#define THROW( ... ) MKExceptions::Throw( __FILE__ , __LINE__ , __FUNCTION__ , __VA_ARGS__ )
+#endif // THROW
+#ifndef ERROR_OUT
+#define ERROR_OUT( ... ) MKExceptions::ErrorOut( __FILE__ , __LINE__ , __FUNCTION__ , __VA_ARGS__ )
+#endif // ERROR_OUT
+#else // !VERBOSE_MESSAGING
+#ifndef WARN
+#define WARN( ... ) MKExceptions::Warn( __FUNCTION__ , __VA_ARGS__ )
+#endif // WARN
+#ifndef WARN_ONCE
+#define WARN_ONCE( ... ) { static bool firstTime = true ; if( firstTime ) MKExceptions::Warn( __FUNCTION__ , __VA_ARGS__ ) ; firstTime = false; }
+#endif // WARN_ONCE
+#ifndef THROW
+#define THROW( ... ) MKExceptions::Throw( __FUNCTION__ , __VA_ARGS__ )
+#endif // THROW
+#ifndef ERROR_OUT
+#define ERROR_OUT( ... ) MKExceptions::ErrorOut( __FUNCTION__ , __VA_ARGS__ )
+#endif // ERROR_OUT
+#endif // VERBOSE_MESSAGING
+
+//////////////////
+// Memory Stuff //
+//////////////////
+size_t getPeakRSS( void );
+size_t getCurrentRSS( void );
+
+struct MemoryInfo
+{
+	static size_t Usage( void ){ return getCurrentRSS(); }
+	static int PeakMemoryUsageMB( void ){ return (int)( getPeakRSS()>>20 ); }
+};
+#if defined( _WIN32 ) || defined( _WIN64 )
+#include <Windows.h>
+#include <Psapi.h>
+inline void SetPeakMemoryMB( size_t sz )
+{
+	sz <<= 20;
+	SIZE_T peakMemory = sz;
+	HANDLE h = CreateJobObject( NULL , NULL );
+	AssignProcessToJobObject( h , GetCurrentProcess() );
+
+	JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
+	jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_JOB_MEMORY;
+	jeli.JobMemoryLimit = peakMemory;
+	if( !SetInformationJobObject( h , JobObjectExtendedLimitInformation , &jeli , sizeof( jeli ) ) ) WARN( "Failed to set memory limit" );
+}
+#else // !_WIN32 && !_WIN64
+#include <sys/time.h> 
+#include <sys/resource.h> 
+inline void SetPeakMemoryMB( size_t sz )
+{
+	sz <<= 20;
+	struct rlimit rl;
+	getrlimit( RLIMIT_AS , &rl );
+	rl.rlim_cur = sz;
+	setrlimit( RLIMIT_AS , &rl );
+}
+#endif // _WIN32 || _WIN64
+
+/*
+* Author:  David Robert Nadeau
+* Site:    http://NadeauSoftware.com/
+* License: Creative Commons Attribution 3.0 Unported License
+*          http://creativecommons.org/licenses/by/3.0/deed.en_US
+*/
+
+#if defined(_WIN32) || defined( _WIN64 )
+#include <windows.h>
+#include <psapi.h>
+
+#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
+#include <unistd.h>
+#include <sys/resource.h>
+
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach/mach.h>
+
+#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
+#include <fcntl.h>
+#include <procfs.h>
+
+#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
+#include <stdio.h>
+
+#endif
+
+#else
+#error "Cannot define getPeakRSS( ) or getCurrentRSS( ) for an unknown OS."
+#endif
+
+
+
+
+
+/**
+* Returns the peak (maximum so far) resident set size (physical
+* memory use) measured in bytes, or zero if the value cannot be
+* determined on this OS.
+*/
+inline size_t getPeakRSS( )
+{
+#if defined(_WIN32)
+	/* Windows -------------------------------------------------- */
+	PROCESS_MEMORY_COUNTERS info;
+	GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
+	return (size_t)info.PeakWorkingSetSize;
+
+#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
+	/* AIX and Solaris ------------------------------------------ */
+	struct psinfo psinfo;
+	int fd = -1;
+	if ( (fd = open( "/proc/self/psinfo", O_RDONLY )) == -1 )
+		return (size_t)0L;      /* Can't open? */
+	if ( read( fd, &psinfo, sizeof(psinfo) ) != sizeof(psinfo) )
+	{
+		close( fd );
+		return (size_t)0L;      /* Can't read? */
+	}
+	close( fd );
+	return (size_t)(psinfo.pr_rssize * 1024L);
+
+#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
+	/* BSD, Linux, and OSX -------------------------------------- */
+	struct rusage rusage;
+	getrusage( RUSAGE_SELF, &rusage );
+#if defined(__APPLE__) && defined(__MACH__)
+	return (size_t)rusage.ru_maxrss;
+#else
+	return (size_t)(rusage.ru_maxrss * 1024L);
+#endif
+
+#else
+	/* Unknown OS ----------------------------------------------- */
+	return (size_t)0L;          /* Unsupported. */
+#endif
+}
+
+
+
+
+
+/**
+* Returns the current resident set size (physical memory use) measured
+* in bytes, or zero if the value cannot be determined on this OS.
+*/
+inline size_t getCurrentRSS( )
+{
+#if defined(_WIN32) || defined( _WIN64 )
+	/* Windows -------------------------------------------------- */
+	PROCESS_MEMORY_COUNTERS info;
+	GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
+	return (size_t)info.WorkingSetSize;
+
+#elif defined(__APPLE__) && defined(__MACH__)
+	/* OSX ------------------------------------------------------ */
+	struct mach_task_basic_info info;
+	mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
+	if ( task_info( mach_task_self( ), MACH_TASK_BASIC_INFO,
+		(task_info_t)&info, &infoCount ) != KERN_SUCCESS )
+		return (size_t)0L;      /* Can't access? */
+	return (size_t)info.resident_size;
+
+#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
+	/* Linux ---------------------------------------------------- */
+	long rss = 0L;
+	FILE* fp = NULL;
+	if ( (fp = fopen( "/proc/self/statm", "r" )) == NULL )
+		return (size_t)0L;      /* Can't open? */
+	if ( fscanf( fp, "%*s%ld", &rss ) != 1 )
+	{
+		fclose( fp );
+		return (size_t)0L;      /* Can't read? */
+	}
+	fclose( fp );
+	return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
+
+#else
+	/* AIX, BSD, Solaris, and Unknown OS ------------------------ */
+	return (size_t)0L;          /* Unsupported. */
+#endif
+}
+
+#endif // MY_MISCELLANY_INCLUDED
diff --git a/Src/MyTime.h b/Src/MyTime.h
deleted file mode 100644
index 09c084e..0000000
--- a/Src/MyTime.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-#ifndef MY_TIME_INCLUDED
-#define MY_TIME_INCLUDED
-
-#include <string.h>
-#include <sys/timeb.h>
-#ifndef WIN32
-#include <sys/time.h>
-#endif // WIN32
-
-inline double Time( void )
-{
-#ifdef WIN32
-	struct _timeb t;
-	_ftime( &t );
-	return double( t.time ) + double( t.millitm ) / 1000.0;
-#else // WIN32
-	struct timeval t;
-	gettimeofday( &t , NULL );
-	return t.tv_sec + double( t.tv_usec ) / 1000000;
-#endif // WIN32
-}
-
-#endif // MY_TIME_INCLUDED
diff --git a/Src/Octree.h b/Src/Octree.h
deleted file mode 100644
index 8373396..0000000
--- a/Src/Octree.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-#ifndef OCT_NODE_INCLUDED
-#define OCT_NODE_INCLUDED
-
-#define NEW_OCTNODE_CODE
-
-#include "Allocator.h"
-#include "BinaryNode.h"
-#include "MarchingCubes.h"
-
-
-#define DIMENSION 3
-
-template< class NodeData >
-class OctNode
-{
-private:
-	static int UseAlloc;
-	unsigned long long _depthAndOffset;
-
-	const OctNode* __faceNeighbor( int dir , int off ) const;
-	const OctNode* __edgeNeighbor( int o , const int i[2] , const int idx[2] ) const;
-	OctNode* __faceNeighbor( int dir , int off , int forceChildren , void (*Initializer)( OctNode& ) );
-	OctNode* __edgeNeighbor( int o , const int i[2] , const int idx[2] , int forceChildren , void (*Initializer)( OctNode& ) );
-public:
-	static const int DepthShift , OffsetShift , OffsetShift1 , OffsetShift2 , OffsetShift3;
-	static const int DepthMask , OffsetMask;
-
-	static Allocator< OctNode > NodeAllocator;
-	static int UseAllocator( void );
-	static void SetAllocator( int blockSize );
-
-	OctNode* parent;
-	OctNode* children;
-	NodeData nodeData;
-
-	OctNode( void (*Initializer)( OctNode& )=NULL );
-	static OctNode* NewBrood( void (*initializer)( OctNode& )=NULL );
-	static void ResetDepthAndOffset( OctNode* root , int d , int off[3] );
-	~OctNode( void );
-	int initChildren( void (*Initializer)( OctNode& )=NULL );
-
-	void depthAndOffset( int& depth , int offset[DIMENSION] ) const; 
-	void centerIndex( int index[DIMENSION] ) const;
-	int depth( void ) const;
-	static inline void DepthAndOffset( const long long& index , int& depth , int offset[DIMENSION] );
-	template< class Real > static inline void CenterAndWidth( const long long& index , Point3D< Real >& center , Real& width );
-	template< class Real > static inline void StartAndWidth( const long long& index , Point3D< Real >& start , Real& width );
-	static inline int Depth( const long long& index );
-	static inline void Index( int depth , const int offset[3] , short& d , short off[DIMENSION] );
-	static inline unsigned long long Index( int depth , const int offset[3] );
-	template< class Real > void centerAndWidth( Point3D<Real>& center , Real& width ) const;
-	template< class Real > void startAndWidth( Point3D< Real >& start , Real& width ) const;
-	template< class Real > bool isInside( Point3D< Real > p ) const;
-
-	size_t leaves( void ) const;
-	size_t maxDepthLeaves( int maxDepth ) const;
-	size_t nodes( void ) const;
-	int maxDepth( void ) const;
-
-	const OctNode* root( void ) const;
-
-	const OctNode* nextLeaf( const OctNode* currentLeaf=NULL ) const;
-	OctNode* nextLeaf( OctNode* currentLeaf=NULL );
-	const OctNode* nextNode( const OctNode* currentNode=NULL ) const;
-	OctNode* nextNode( OctNode* currentNode=NULL );
-	const OctNode* nextBranch( const OctNode* current ) const;
-	OctNode* nextBranch( OctNode* current );
-	const OctNode* prevBranch( const OctNode* current ) const;
-	OctNode* prevBranch( OctNode* current );
-
-	void setFullDepth( int maxDepth , void (*Initializer)( OctNode& )=NULL );
-
-	void printLeaves( void ) const;
-	void printRange( void ) const;
-
-	template< class Real > static int CornerIndex( const Point3D<Real>& center , const Point3D<Real> &p );
-
-	OctNode* faceNeighbor( int faceIndex , int forceChildren , void (*Initializer)( OctNode& )=NULL );
-	const OctNode* faceNeighbor( int faceIndex ) const;
-	OctNode* edgeNeighbor( int edgeIndex , int forceChildren , void (*Initializer)( OctNode& )=NULL );
-	const OctNode* edgeNeighbor( int edgeIndex ) const;
-	OctNode* cornerNeighbor( int cornerIndex , int forceChildren , void (*Initializer)( OctNode& )=NULL );
-	const OctNode* cornerNeighbor( int cornerIndex ) const;
-
-	int write( const char* fileName ) const;
-	int write( FILE* fp ) const;
-	int read( const char* fileName , void (*Initializer)( OctNode& )=NULL );
-	int read( FILE* fp , void (*Initializer)( OctNode& )=NULL );
-
-	template< unsigned int Width >
-	struct Neighbors
-	{
-		OctNode* neighbors[Width][Width][Width];
-		Neighbors( void );
-		void clear( void );
-	};
-	template< unsigned int Width >
-	struct ConstNeighbors
-	{
-		const OctNode* neighbors[Width][Width][Width];
-		ConstNeighbors( void );
-		void clear( void );
-	};
-
-	template< unsigned int LeftRadius , unsigned int RightRadius >
-	class NeighborKey
-	{
-		int _depth;
-	public:
-		template< int Width > using Neighbors = typename OctNode::template Neighbors< Width >;
-		static const int Width = LeftRadius + RightRadius + 1;
-		Neighbors< Width >* neighbors;
-
-		NeighborKey( void );
-		NeighborKey( const NeighborKey& key );
-		~NeighborKey( void );
-		int depth( void ) const { return _depth; }
-
-		void set( int depth );
-		template< bool CreateNodes > typename OctNode< NodeData >::template Neighbors< LeftRadius+RightRadius+1 >& getNeighbors( OctNode* node , void (*Initializer)( OctNode& )=NULL );
-		template< bool CreateNodes , unsigned int _LeftRadius , unsigned int _RightRadius > void getNeighbors( OctNode* node , Neighbors< _LeftRadius + _RightRadius + 1 >& neighbors , void (*Initializer)( OctNode& )=NULL );
-		template< bool CreateNodes > bool getChildNeighbors( int cIdx , int d , Neighbors< Width >& childNeighbors , void (*Initializer)( OctNode& )=NULL ) const;
-		template< bool CreateNodes , class Real > bool getChildNeighbors( Point3D< Real > p , int d , Neighbors< Width >& childNeighbors , void (*Initializer)( OctNode& )=NULL ) const;
-		typename OctNode< NodeData >::template Neighbors< LeftRadius+RightRadius+1 >& getNeighbors( const OctNode* node ) { return getNeighbors< false >( (OctNode*)node , NULL ); }
-		template< unsigned int _LeftRadius , unsigned int _RightRadius > void getNeighbors( const OctNode* node , Neighbors< _LeftRadius + _RightRadius + 1 >& neighbors ){ return getNeighbors< false , _LeftRadius , _RightRadius >( (OctNode*)node , NULL ); }
-		bool getChildNeighbors( int cIdx , int d , Neighbors< Width >& childNeighbors ) const { return getChildNeighbors< false >( cIdx , d , childNeighbors , NULL ); }
-		template< class Real > bool getChildNeighbors( Point3D< Real > p , int d , Neighbors< Width >& childNeighbors ) const { return getChildNeighbors< false , Real >( p , d , childNeighbors , NULL ); }
-	};
-
-	template< unsigned int LeftRadius , unsigned int RightRadius >
-	class ConstNeighborKey
-	{
-		int _depth;
-	public:
-		template< int Width > using Neighbors = typename OctNode::template ConstNeighbors< Width >;
-		static const int Width = LeftRadius + RightRadius + 1;
-		ConstNeighbors< Width >* neighbors;
-
-		ConstNeighborKey( void );
-		ConstNeighborKey( const ConstNeighborKey& key );
-		~ConstNeighborKey( void );
-		int depth( void ) const { return _depth; }
-
-		void set( int depth );
-		typename OctNode< NodeData >::template ConstNeighbors< LeftRadius+RightRadius+1 >& getNeighbors( const OctNode* node );
-		template< unsigned int _LeftRadius , unsigned int _RightRadius > void getNeighbors( const OctNode* node , ConstNeighbors< _LeftRadius + _RightRadius + 1 >& neighbors );
-	};
-
-	void centerIndex( int maxDepth , int index[DIMENSION] ) const;
-	int width( int maxDepth ) const;
-};
-
-
-#include "Octree.inl"
-
-#endif // OCT_NODE_INCLUDED
diff --git a/Src/Octree.inl b/Src/Octree.inl
deleted file mode 100644
index 40202f9..0000000
--- a/Src/Octree.inl
+++ /dev/null
@@ -1,1135 +0,0 @@
-/*
-Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of
-conditions and the following disclaimer. Redistributions in binary form must reproduce
-the above copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the distribution. 
-
-Neither the name of the Johns Hopkins University nor the names of its contributors
-may be used to endorse or promote products derived from this software without specific
-prior written permission. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-*/
-
-#include <stdlib.h>
-#include <math.h>
-#include <algorithm>
-#include <functional>
-
-/////////////
-// OctNode //
-/////////////
-template< class NodeData > const int OctNode< NodeData >::DepthShift=5;
-template< class NodeData > const int OctNode< NodeData >::OffsetShift = ( sizeof(long long)*8 - DepthShift ) / 3;
-template< class NodeData > const int OctNode< NodeData >::DepthMask=(1<<DepthShift)-1;
-template< class NodeData > const int OctNode< NodeData >::OffsetMask=(1<<OffsetShift)-1;
-template< class NodeData > const int OctNode< NodeData >::OffsetShift1=DepthShift;
-template< class NodeData > const int OctNode< NodeData >::OffsetShift2=OffsetShift1+OffsetShift;
-template< class NodeData > const int OctNode< NodeData >::OffsetShift3=OffsetShift2+OffsetShift;
-
-template< class NodeData > int OctNode< NodeData >::UseAlloc=0;
-template< class NodeData > Allocator<OctNode< NodeData > > OctNode< NodeData >::NodeAllocator;
-
-template< class NodeData >
-void OctNode< NodeData >::SetAllocator(int blockSize)
-{
-	if(blockSize>0)
-	{
-		UseAlloc=1;
-		NodeAllocator.set(blockSize);
-	}
-	else{UseAlloc=0;}
-}
-template< class NodeData > int OctNode< NodeData >::UseAllocator( void ){ return UseAlloc; }
-
-template< class NodeData >
-OctNode< NodeData >::OctNode( void (*Initializer)( OctNode& ) )
-{
-	parent = children = NULL;
-	_depthAndOffset = 0;
-	if( Initializer ) Initializer( *this );
-}
-template< class NodeData >
-OctNode< NodeData >::~OctNode( void )
-{
-	if( !UseAlloc && children ) delete[] children;
-	parent = children = NULL;
-}
-
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::NewBrood( void (*Initializer)( OctNode& ) )
-{
-	OctNode< NodeData >* brood;
-	if( UseAlloc ) brood = NodeAllocator.newElements( Cube::CORNERS );
-	else brood = new OctNode[Cube::CORNERS];
-	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ )
-	{
-		int off[] = { i , j , k };
-		int idx = Cube::CornerIndex( i , j , k );
-		if( Initializer ) Initializer( brood[idx] );
-		brood[idx]._depthAndOffset = Index( 0 , off );
-	}
-	return brood;
-}
-template< class NodeData >
-void OctNode< NodeData >::ResetDepthAndOffset( OctNode* root , int d , int off[3] )
-{
-	// Recursive lambda requires an explicit declaration
-#define PARENT_DEPTH_AND_OFFSET( d , off ) ( d-- , off[0]>>=1 , off[1]>>=1 , off[2]>>=1 )
-#define  CHILD_DEPTH_AND_OFFSET( d , off ) ( d++ , off[0]<<=1 , off[1]<<=1 , off[2]<<=1 )
-	std::function< OctNode* ( OctNode* , int& , int[] ) > _nextBranch = [&]( OctNode* current , int& d , int off[3] )
-	{
-		if( current==root ) return (OctNode*)NULL;
-		else
-		{
-			int c = (int)( current - current->parent->children );
-
-			if( c==Cube::CORNERS-1 )
-			{
-				PARENT_DEPTH_AND_OFFSET( d , off );
-				return _nextBranch( current->parent , d , off );
-			}
-			else
-			{
-				int x , y , z;
-				Cube::FactorCornerIndex( c+1 , x , y , z );
-				PARENT_DEPTH_AND_OFFSET( d , off ) ; CHILD_DEPTH_AND_OFFSET( d , off );
-				off[0] |= x , off[1] |= y , off[2] |= z;
-				return current+1;
-			}
-		}
-	};
-	auto _nextNode = [&]( OctNode* current , int& d , int off[3] )
-	{
-		if( !current ) return root;
-		else if( current->children )
-		{
-			CHILD_DEPTH_AND_OFFSET( d , off );
-			return current->children;
-		}
-		else return _nextBranch( current , d , off );
-	};
-#undef PARENT_DEPTH_AND_OFFSET
-#undef  CHILD_DEPTH_AND_OFFSET
-	for( OctNode* node=_nextNode( NULL , d , off ) ; node ; node = _nextNode( node , d , off ) ) node->_depthAndOffset = Index( d , off );
-}
-
-template< class NodeData >
-void OctNode< NodeData >::setFullDepth( int maxDepth , void (*Initializer)( OctNode& ) )
-{
-	if( maxDepth )
-	{
-		if( !children ) initChildren( Initializer );
-		for( int i=0 ; i<8 ; i++ ) children[i].setFullDepth( maxDepth-1 , Initializer );
-	}
-}
-
-template< class NodeData >
-int OctNode< NodeData >::initChildren( void (*Initializer)( OctNode& ) )
-{
-	{
-		if( UseAlloc ) children = NodeAllocator.newElements( Cube::CORNERS );
-		else
-		{
-			if( children ) delete[] children;
-			children = new OctNode[Cube::CORNERS];
-		}
-		if( !children ) fprintf( stderr , "[ERROR] OctNode::initChildren: Failed to initialize children in OctNode::initChildren\n" ) , exit(0);
-		int d , off[3];
-		depthAndOffset( d , off );
-		for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ )
-		{
-			int idx=Cube::CornerIndex(i,j,k);
-			children[idx].parent = this;
-			children[idx].children = NULL;
-			if( Initializer ) Initializer( children[idx] );
-			int off2[3];
-			off2[0] = (off[0]<<1)+i;
-			off2[1] = (off[1]<<1)+j;
-			off2[2] = (off[2]<<1)+k;
-			children[idx]._depthAndOffset = Index( d+1 , off2 );
-		}
-	}
-	return 1;
-}
-template< class NodeData >
-inline void OctNode< NodeData >::Index(int depth,const int offset[3],short& d,short off[3]){
-	d=short(depth);
-	off[0]=short((1<<depth)+offset[0]-1);
-	off[1]=short((1<<depth)+offset[1]-1);
-	off[2]=short((1<<depth)+offset[2]-1);
-}
-
-template< class NodeData >
-inline void OctNode< NodeData >::depthAndOffset( int& depth , int offset[DIMENSION] ) const
-{
-	depth = int( _depthAndOffset & DepthMask );
-	offset[0] = int( (_depthAndOffset>>OffsetShift1) & OffsetMask );
-	offset[1] = int( (_depthAndOffset>>OffsetShift2) & OffsetMask );
-	offset[2] = int( (_depthAndOffset>>OffsetShift3) & OffsetMask );
-}
-template< class NodeData >
-inline void OctNode< NodeData >::centerIndex( int index[DIMENSION] ) const
-{
-	int d , off[DIMENSION];
-	depthAndOffset( d , off );
-	for( int i=0 ; i<DIMENSION ; i++ ) index[i] = BinaryNode::CenterIndex( d , off[i] );
-}
-template< class NodeData >
-inline unsigned long long OctNode< NodeData >::Index( int depth , const int offset[3] )
-{
-	unsigned long long idx=0;
-	idx |= ( ( (unsigned long long)(depth    ) ) & DepthMask  );
-	idx |= ( ( (unsigned long long)(offset[0]) ) & OffsetMask ) << OffsetShift1;
-	idx |= ( ( (unsigned long long)(offset[1]) ) & OffsetMask ) << OffsetShift2;
-	idx |= ( ( (unsigned long long)(offset[2]) ) & OffsetMask ) << OffsetShift3;
-	return idx;
-}
-template< class NodeData >
-inline int OctNode< NodeData >::depth( void ) const {return int( _depthAndOffset & DepthMask );}
-template< class NodeData >
-inline void OctNode< NodeData >::DepthAndOffset(const long long& index,int& depth,int offset[3]){
-	depth=int(index&DepthMask);
-	offset[0]=(int((index>>OffsetShift1)&OffsetMask)+1)&(~(1<<depth));
-	offset[1]=(int((index>>OffsetShift2)&OffsetMask)+1)&(~(1<<depth));
-	offset[2]=(int((index>>OffsetShift3)&OffsetMask)+1)&(~(1<<depth));
-}
-template< class NodeData >
-inline int OctNode< NodeData >::Depth(const long long& index){return int(index&DepthMask);}
-template< class NodeData >
-template< class Real >
-void OctNode< NodeData >::centerAndWidth( Point3D<Real>& center , Real& width ) const
-{
-	int depth , offset[3];
-	depthAndOffset( depth , offset );
-	width = Real( 1.0 / (1<<depth) );
-	for( int dim=0 ; dim<DIMENSION ; dim++ ) center.coords[dim] = Real( 0.5+offset[dim] ) * width;
-}
-template< class NodeData >
-template< class Real >
-void OctNode< NodeData >::startAndWidth( Point3D<Real>& start , Real& width ) const
-{
-	int depth , offset[3];
-	depthAndOffset( depth , offset );
-	width = Real( 1.0 / (1<<depth) );
-	for( int dim=0 ; dim<DIMENSION ; dim++ ) start.coords[dim] = Real( offset[dim] ) * width;
-}
-template< class NodeData >
-template< class Real >
-bool OctNode< NodeData >::isInside( Point3D< Real > p ) const
-{
-	Point3D< Real > c;
-	Real w;
-	centerAndWidth( c , w );
-	w /= 2;
-	return (c[0]-w)<p[0] && p[0]<=(c[0]+w) && (c[1]-w)<p[1] && p[1]<=(c[1]+w) && (c[2]-w)<p[2] && p[2]<=(c[2]+w);
-}
-template< class NodeData >
-template< class Real >
-inline void OctNode< NodeData >::CenterAndWidth(const long long& index,Point3D<Real>& center,Real& width){
-	int depth,offset[3];
-	depth=index&DepthMask;
-	offset[0]=(int((index>>OffsetShift1)&OffsetMask)+1)&(~(1<<depth));
-	offset[1]=(int((index>>OffsetShift2)&OffsetMask)+1)&(~(1<<depth));
-	offset[2]=(int((index>>OffsetShift3)&OffsetMask)+1)&(~(1<<depth));
-	width=Real(1.0/(1<<depth));
-	for(int dim=0;dim<DIMENSION;dim++){center.coords[dim]=Real(0.5+offset[dim])*width;}
-}
-template< class NodeData >
-template< class Real >
-inline void OctNode< NodeData >::StartAndWidth( const long long& index , Point3D< Real >& start , Real& width )
-{
-	int depth,offset[3];
-	depth = index&DepthMask;
-	offset[0] = (int((index>>OffsetShift1)&OffsetMask)+1)&(~(1<<depth));
-	offset[1] = (int((index>>OffsetShift2)&OffsetMask)+1)&(~(1<<depth));
-	offset[2] = (int((index>>OffsetShift3)&OffsetMask)+1)&(~(1<<depth));
-	width = Real(1.0/(1<<depth));
-	for( int dim=0 ; dim<DIMENSION ; dim++ ) start.coords[dim] = Real(offset[dim])*width;
-}
-
-template< class NodeData >
-int OctNode< NodeData >::maxDepth(void) const{
-	if(!children){return 0;}
-	else{
-		int c,d;
-		for(int i=0;i<Cube::CORNERS;i++){
-			d=children[i].maxDepth();
-			if(!i || d>c){c=d;}
-		}
-		return c+1;
-	}
-}
-template< class NodeData >
-size_t OctNode< NodeData >::nodes( void ) const
-{
-	if( !children ) return 1;
-	else
-	{
-		size_t c=0;
-		for( int i=0 ; i<Cube::CORNERS ; i++ ) c += children[i].nodes();
-		return c+1;
-	}
-}
-template< class NodeData >
-size_t OctNode< NodeData >::leaves( void ) const
-{
-	if( !children ) return 1;
-	else
-	{
-		size_t c=0;
-		for( int i=0 ; i<Cube::CORNERS ; i++ ) c += children[i].leaves();
-		return c;
-	}
-}
-template< class NodeData >
-size_t OctNode< NodeData >::maxDepthLeaves( int maxDepth ) const
-{
-	if( depth()>maxDepth ) return 0;
-	if( !children ) return 1;
-	else
-	{
-		size_t c=0;
-		for( int i=0 ; i<Cube::CORNERS ; i++ ) c += children[i].maxDepthLeaves(maxDepth);
-		return c;
-	}
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::root(void) const{
-	const OctNode* temp=this;
-	while(temp->parent){temp=temp->parent;}
-	return temp;
-}
-
-
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::nextBranch( const OctNode* current ) const
-{
-	if( !current->parent || current==this ) return NULL;
-	if( current-current->parent->children==Cube::CORNERS-1 ) return nextBranch( current->parent );
-	else return current+1;
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::nextBranch(OctNode* current){
-	if(!current->parent || current==this){return NULL;}
-	if(current-current->parent->children==Cube::CORNERS-1){return nextBranch(current->parent);}
-	else{return current+1;}
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::prevBranch( const OctNode* current ) const
-{
-	if( !current->parent || current==this ) return NULL;
-	if( current-current->parent->children==0 ) return prevBranch( current->parent );
-	else return current-1;
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::prevBranch( OctNode* current )
-{
-	if( !current->parent || current==this ) return NULL;
-	if( current-current->parent->children==0 ) return prevBranch( current->parent );
-	else return current-1;
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::nextLeaf(const OctNode* current) const{
-	if( !current )
-	{
-		const OctNode< NodeData >* temp=this;
-		while( temp->children ) temp=&temp->children[0];
-		return temp;
-	}
-	if( current->children ) return current->nextLeaf();
-	const OctNode* temp = nextBranch(current);
-	if( !temp ) return NULL;
-	else return temp->nextLeaf();
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::nextLeaf( OctNode* current )
-{
-	if( !current )
-	{
-		OctNode< NodeData >* temp=this;
-		while(temp->children){temp=&temp->children[0];}
-		return temp;
-	}
-	if(current->children){return current->nextLeaf();}
-	OctNode* temp=nextBranch(current);
-	if(!temp){return NULL;}
-	else{return temp->nextLeaf();}
-}
-
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::nextNode( const OctNode* current ) const
-{
-	if( !current ) return this;
-	else if( current->children ) return &current->children[0];
-	else return nextBranch(current);
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::nextNode( OctNode* current )
-{
-	if( !current ) return this;
-	else if( current->children ) return &current->children[0];
-	else return nextBranch( current );
-}
-
-template< class NodeData >
-void OctNode< NodeData >::printRange(void) const
-{
-	Point3D< float > center;
-	float width;
-	centerAndWidth(center,width);
-	for(int dim=0;dim<DIMENSION;dim++){
-		printf("%[%f,%f]",center.coords[dim]-width/2,center.coords[dim]+width/2);
-		if(dim<DIMENSION-1){printf("x");}
-		else printf("\n");
-	}
-}
-
-template< class NodeData >
-template< class Real >
-int OctNode< NodeData >::CornerIndex(const Point3D<Real>& center,const Point3D<Real>& p){
-	int cIndex=0;
-	if(p.coords[0]>center.coords[0]){cIndex|=1;}
-	if(p.coords[1]>center.coords[1]){cIndex|=2;}
-	if(p.coords[2]>center.coords[2]){cIndex|=4;}
-	return cIndex;
-}
-
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::faceNeighbor( int faceIndex , int forceChildren , void (*Initializer)( OctNode& ) ){return __faceNeighbor( faceIndex>>1 , faceIndex&1 , forceChildren , Initializer ); }
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::faceNeighbor(int faceIndex) const {return __faceNeighbor(faceIndex>>1,faceIndex&1);}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::__faceNeighbor( int dir , int off , int forceChildren , void (*Initializer)( OctNode& ) )
-{
-	if(!parent){return NULL;}
-	int pIndex=int(this-parent->children);
-	pIndex^=(1<<dir);
-	if((pIndex & (1<<dir))==(off<<dir)){return &parent->children[pIndex];}
-	else{
-		OctNode* temp=parent->__faceNeighbor(dir,off,forceChildren);
-		if( !temp ) return NULL;
-		if( !temp->children )
-		{
-			if( forceChildren ) temp->initChildren( Initializer );
-			else return temp;
-		}
-		return &temp->children[pIndex];
-	}
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::__faceNeighbor(int dir,int off) const {
-	if(!parent){return NULL;}
-	int pIndex=int(this-parent->children);
-	pIndex^=(1<<dir);
-	if((pIndex & (1<<dir))==(off<<dir)){return &parent->children[pIndex];}
-	else{
-		const OctNode* temp=parent->__faceNeighbor(dir,off);
-		if(!temp || !temp->children){return temp;}
-		else{return &temp->children[pIndex];}
-	}
-}
-
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::edgeNeighbor( int edgeIndex , int forceChildren , void (*Initializer)( OctNode& ) )
-{
-	int idx[2],o,i[2];
-	Cube::FactorEdgeIndex( edgeIndex , o , i[0] , i[1] );
-	switch(o){
-		case 0:	idx[0]=1;	idx[1]=2;	break;
-		case 1:	idx[0]=0;	idx[1]=2;	break;
-		case 2:	idx[0]=0;	idx[1]=1;	break;
-	};
-	return __edgeNeighbor( o , i , idx , forceChildren , Initializer );
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::edgeNeighbor(int edgeIndex) const {
-	int idx[2],o,i[2];
-	Cube::FactorEdgeIndex(edgeIndex,o,i[0],i[1]);
-	switch(o){
-		case 0:	idx[0]=1;	idx[1]=2;	break;
-		case 1:	idx[0]=0;	idx[1]=2;	break;
-		case 2:	idx[0]=0;	idx[1]=1;	break;
-	};
-	return __edgeNeighbor(o,i,idx);
-}
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::__edgeNeighbor(int o,const int i[2],const int idx[2]) const{
-	if(!parent){return NULL;}
-	int pIndex=int(this-parent->children);
-	int aIndex,x[DIMENSION];
-
-	Cube::FactorCornerIndex(pIndex,x[0],x[1],x[2]);
-	aIndex=(~((i[0] ^ x[idx[0]]) | ((i[1] ^ x[idx[1]])<<1))) & 3;
-	pIndex^=(7 ^ (1<<o));
-	if(aIndex==1)	{	// I can get the neighbor from the parent's face adjacent neighbor
-		const OctNode* temp=parent->__faceNeighbor(idx[0],i[0]);
-		if(!temp || !temp->children){return NULL;}
-		else{return &temp->children[pIndex];}
-	}
-	else if(aIndex==2)	{	// I can get the neighbor from the parent's face adjacent neighbor
-		const OctNode* temp=parent->__faceNeighbor(idx[1],i[1]);
-		if(!temp || !temp->children){return NULL;}
-		else{return &temp->children[pIndex];}
-	}
-	else if(aIndex==0)	{	// I can get the neighbor from the parent
-		return &parent->children[pIndex];
-	}
-	else if(aIndex==3)	{	// I can get the neighbor from the parent's edge adjacent neighbor
-		const OctNode* temp=parent->__edgeNeighbor(o,i,idx);
-		if(!temp || !temp->children){return temp;}
-		else{return &temp->children[pIndex];}
-	}
-	else{return NULL;}
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::__edgeNeighbor( int o , const int i[2] , const int idx[2] , int forceChildren , void (*Initializer)( OctNode& ) )
-{
-	if(!parent){return NULL;}
-	int pIndex=int(this-parent->children);
-	int aIndex,x[DIMENSION];
-
-	Cube::FactorCornerIndex(pIndex,x[0],x[1],x[2]);
-	aIndex=(~((i[0] ^ x[idx[0]]) | ((i[1] ^ x[idx[1]])<<1))) & 3;
-	pIndex^=(7 ^ (1<<o));
-	if(aIndex==1)	{	// I can get the neighbor from the parent's face adjacent neighbor
-		OctNode* temp=parent->__faceNeighbor(idx[0],i[0],0);
-		if(!temp || !temp->children){return NULL;}
-		else{return &temp->children[pIndex];}
-	}
-	else if(aIndex==2)	{	// I can get the neighbor from the parent's face adjacent neighbor
-		OctNode* temp=parent->__faceNeighbor(idx[1],i[1],0);
-		if(!temp || !temp->children){return NULL;}
-		else{return &temp->children[pIndex];}
-	}
-	else if(aIndex==0)	{	// I can get the neighbor from the parent
-		return &parent->children[pIndex];
-	}
-	else if(aIndex==3)	{	// I can get the neighbor from the parent's edge adjacent neighbor
-		OctNode* temp=parent->__edgeNeighbor(o,i,idx,forceChildren);
-		if( !temp ) return NULL;
-		if( !temp->children )
-		{
-			if( forceChildren ) temp->initChildren( Initializer );
-			else return temp;
-		}
-		return &temp->children[pIndex];
-	}
-	else{return NULL;}
-}
-
-template< class NodeData >
-const OctNode< NodeData >* OctNode< NodeData >::cornerNeighbor(int cornerIndex) const {
-	int pIndex,aIndex=0;
-	if(!parent){return NULL;}
-
-	pIndex=int(this-parent->children);
-	aIndex=(cornerIndex ^ pIndex);	// The disagreement bits
-	pIndex=(~pIndex)&7;				// The antipodal point
-	if(aIndex==7){					// Agree on no bits
-		return &parent->children[pIndex];
-	}
-	else if(aIndex==0){				// Agree on all bits
-		const OctNode* temp=((const OctNode*)parent)->cornerNeighbor(cornerIndex);
-		if(!temp || !temp->children){return temp;}
-		else{return &temp->children[pIndex];}
-	}
-	else if(aIndex==6){				// Agree on face 0
-		const OctNode* temp=((const OctNode*)parent)->__faceNeighbor(0,cornerIndex & 1);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==5){				// Agree on face 1
-		const OctNode* temp=((const OctNode*)parent)->__faceNeighbor(1,(cornerIndex & 2)>>1);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==3){				// Agree on face 2
-		const OctNode* temp=((const OctNode*)parent)->__faceNeighbor(2,(cornerIndex & 4)>>2);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==4){				// Agree on edge 2
-		const OctNode* temp=((const OctNode*)parent)->edgeNeighbor(8 | (cornerIndex & 1) | (cornerIndex & 2) );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==2){				// Agree on edge 1
-		const OctNode* temp=((const OctNode*)parent)->edgeNeighbor(4 | (cornerIndex & 1) | ((cornerIndex & 4)>>1) );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==1){				// Agree on edge 0
-		const OctNode* temp=((const OctNode*)parent)->edgeNeighbor(((cornerIndex & 2) | (cornerIndex & 4))>>1 );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else{return NULL;}
-}
-template< class NodeData >
-OctNode< NodeData >* OctNode< NodeData >::cornerNeighbor( int cornerIndex , int forceChildren , void (*Initializer)( OctNode& ) )
-{
-	int pIndex,aIndex=0;
-	if(!parent){return NULL;}
-
-	pIndex=int(this-parent->children);
-	aIndex=(cornerIndex ^ pIndex);	// The disagreement bits
-	pIndex=(~pIndex)&7;				// The antipodal point
-	if(aIndex==7){					// Agree on no bits
-		return &parent->children[pIndex];
-	}
-	else if(aIndex==0){				// Agree on all bits
-		OctNode* temp=((OctNode*)parent)->cornerNeighbor( cornerIndex , forceChildren , Initializer );
-		if( !temp ) return NULL;
-		if( !temp->children )
-		{
-			if(forceChildren) temp->initChildren( Initializer );
-			else return temp;
-		}
-		return &temp->children[pIndex];
-	}
-	else if(aIndex==6){				// Agree on face 0
-		OctNode* temp=((OctNode*)parent)->__faceNeighbor(0,cornerIndex & 1,0);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==5){				// Agree on face 1
-		OctNode* temp=((OctNode*)parent)->__faceNeighbor(1,(cornerIndex & 2)>>1,0);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==3){				// Agree on face 2
-		OctNode* temp=((OctNode*)parent)->__faceNeighbor(2,(cornerIndex & 4)>>2,0);
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==4){				// Agree on edge 2
-		OctNode* temp=((OctNode*)parent)->edgeNeighbor(8 | (cornerIndex & 1) | (cornerIndex & 2) );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==2){				// Agree on edge 1
-		OctNode* temp=((OctNode*)parent)->edgeNeighbor(4 | (cornerIndex & 1) | ((cornerIndex & 4)>>1) );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else if(aIndex==1){				// Agree on edge 0
-		OctNode* temp=((OctNode*)parent)->edgeNeighbor(((cornerIndex & 2) | (cornerIndex & 4))>>1 );
-		if(!temp || !temp->children){return NULL;}
-		else{return & temp->children[pIndex];}
-	}
-	else{return NULL;}
-}
-
-////////////////////////
-// OctNode::Neighbors //
-////////////////////////
-template< class NodeData >
-template< unsigned int Width >
-OctNode< NodeData >::Neighbors< Width >::Neighbors( void ){ clear(); }
-template< class NodeData >
-template< unsigned int Width >
-void OctNode< NodeData >::Neighbors< Width >::clear( void ){ for( int i=0 ; i<Width ; i++ ) for( int j=0 ; j<Width ; j++ ) for( int k=0 ; k<Width ; k++ ) neighbors[i][j][k]=NULL; }
-
-/////////////////////////////
-// OctNode::ConstNeighbors //
-/////////////////////////////
-template< class NodeData >
-template< unsigned int Width >
-OctNode< NodeData >::ConstNeighbors< Width >::ConstNeighbors( void ){ clear(); }
-template< class NodeData >
-template< unsigned int Width >
-void OctNode< NodeData >::ConstNeighbors< Width >::clear( void ){ for( int i=0 ; i<Width ; i++ ) for( int j=0 ; j<Width ; j++ ) for( int k=0 ; k<Width ; k++ ) neighbors[i][j][k]=NULL; }
-
-//////////////////////////
-// OctNode::NeighborKey //
-//////////////////////////
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::NeighborKey( void ){ _depth=-1 , neighbors=NULL; }
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::NeighborKey( const NeighborKey& nKey )
-{
-	_depth = 0 , neighbors = NULL;
-	set( nKey._depth );
-	for( int d=0 ; d<=_depth ; d++ ) memcpy( &neighbors[d] , &nKey.neighbors[d] , sizeof( Neighbors< Width > ) );
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::~NeighborKey( void )
-{
-	if( neighbors ) delete[] neighbors;
-	neighbors = NULL;
-}
-
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-void OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::set( int d )
-{
-	if( neighbors ) delete[] neighbors;
-	neighbors = NULL;
-	_depth = d;
-	if( d<0 ) return;
-	neighbors = new Neighbors< Width >[d+1];
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-template< bool CreateNodes >
-bool OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::getChildNeighbors( int cIdx , int d , Neighbors< Width >& cNeighbors , void (*Initializer)( OctNode& ) ) const
-{
-	Neighbors< Width >& pNeighbors = neighbors[d];
-	// Check that we actuall have a center node
-	if( !pNeighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] ) return false;
-	
-	// Get the indices of the child node that would contain the point (and its antipode)
-	int cx , cy , cz;
-	Cube::FactorCornerIndex( cIdx , cx , cy , cz );
-
-
-	// Iterate over the finer neighbors and set them (if you can)
-	// Here:
-	// (x,y,z) give the position of the finer nodes relative to the center,
-	// (_x,_y,_z) give a positive global position, up to an even offset, and
-	// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-	for( int z=-(int)LeftRadius ; z<=(int)RightRadius ; z++ )
-	{
-		int _z = (z+cz) + (LeftRadius<<1) , pz = ( _z>>1 ) , zz = z+LeftRadius;
-		for( int y=-(int)LeftRadius ; y<=(int)RightRadius ; y++ )
-		{
-			int _y = (y+cy) + (LeftRadius<<1) , py = ( _y>>1 ) , yy = y+LeftRadius;
-
-			int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-			for( int x=-(int)LeftRadius ; x<=(int)RightRadius ; x++ )
-			{
-				int _x = (x+cx) + (LeftRadius<<1) , px = ( _x>>1 ) , xx = x+LeftRadius;
-
-				if( CreateNodes )
-				{
-					if( pNeighbors.neighbors[px][py][pz] )
-					{
-						if( !pNeighbors.neighbors[px][py][pz]->children ) pNeighbors.neighbors[px][py][pz]->initChildren( Initializer );
-						cNeighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-					}
-					else cNeighbors.neighbors[xx][yy][zz] = NULL;
-				}
-				else
-				{
-					if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-						cNeighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-					else cNeighbors.neighbors[xx][yy][zz] = NULL;
-				}
-			}
-		}
-	}
-	return true;
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-template< bool CreateNodes , class Real >
-bool OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::getChildNeighbors( Point3D< Real > p , int d , Neighbors< Width >& cNeighbors , void (*Initializer)( OctNode& ) ) const
-{
-	Neighbors< Width >& pNeighbors = neighbors[d];
-	// Check that we actuall have a center node
-	if( !pNeighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] ) return false;
-	Point3D< Real > c;
-	Real w;
-	pNeighbors.neighbors[LeftRadius][LeftRadius][LeftRadius]->centerAndWidth( c , w );
-	return getChildNeighbors< CreateNodes >( CornerIndex( c , p ) , d , cNeighbors , Initializer );
-}
-
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-template< bool CreateNodes >
-typename OctNode< NodeData >::template Neighbors< LeftRadius+RightRadius+1 >& OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::getNeighbors( OctNode< NodeData >* node , void (*Initializer)( OctNode& ) )
-{
-	Neighbors< Width >& neighbors = this->neighbors[ node->depth() ];
-	if( node==neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] )
-	{
-		bool reset = false;
-		for( int i=0 ; i<Width ; i++ ) for( int j=0 ; j<Width ; j++ ) for( int k=0 ; k<Width ; k++ ) if( !neighbors.neighbors[i][j][k] ) reset = true;
-		if( reset ) neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] = NULL;
-	}
-	if( node!=neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] )
-	{
-		neighbors.clear();
-
-		if( !node->parent ) neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] = node;
-		else
-		{
-			Neighbors< Width >& pNeighbors = getNeighbors< CreateNodes >( node->parent , Initializer );
-
-			// Get the indices of the child node that would contain the point (and its antipode)
-			int cx , cy , cz;
-			Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-			// Iterate over the finer neighbors and set them (if you can)
-			// Here:
-			// (x,y,z) give the position of the finer nodes relative to the center,
-			// (_x,_y,_z) give a positive global position, up to an even offset, and
-			// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-			for( int z=-(int)LeftRadius ; z<=(int)RightRadius ; z++ )
-			{
-				int _z = (z+cz) + (LeftRadius<<1) , pz = ( _z>>1 ) , zz = z+LeftRadius;
-				for( int y=-(int)LeftRadius ; y<=(int)RightRadius ; y++ )
-				{
-					int _y = (y+cy) + (LeftRadius<<1) , py = ( _y>>1 ) , yy = y+LeftRadius;
-
-					int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-					for( int x=-(int)LeftRadius ; x<=(int)RightRadius ; x++ )
-					{
-						int _x = (x+cx) + (LeftRadius<<1) , px = ( _x>>1 ) , xx = x+LeftRadius;
-						if( CreateNodes )
-						{
-							if( pNeighbors.neighbors[px][py][pz] )
-							{
-								if( !pNeighbors.neighbors[px][py][pz]->children ) pNeighbors.neighbors[px][py][pz]->initChildren( Initializer );
-								neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-							}
-							else neighbors.neighbors[xx][yy][zz] = NULL;
-						}
-						else
-						{
-							if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-								neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-							else neighbors.neighbors[xx][yy][zz] = NULL;
-						}
-					}
-				}
-			}
-		}
-	}
-	return neighbors;
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-template< bool CreateNodes , unsigned int _LeftRadius , unsigned int _RightRadius >
-void OctNode< NodeData >::NeighborKey< LeftRadius , RightRadius >::getNeighbors( OctNode< NodeData >* node , Neighbors< _LeftRadius + _RightRadius + 1 >& neighbors , void (*Initializer)( OctNode& ) )
-{
-	neighbors.clear();
-	if( !node ) return;
-
-	// [WARNING] This estimate of the required radius is somewhat conservative if the radius is odd (depending on where the node is relative to its parent)
-	const unsigned int _PLeftRadius = (_LeftRadius+1)/2 , _PRightRadius = (_RightRadius+1)/2;
-	// If we are at the root of the tree, we are done
-	if( !node->parent ) neighbors.neighbors[_LeftRadius][_LeftRadius][_LeftRadius] = node;
-	// If we can get the data from the the key for the parent node, do that
-	else if( _PLeftRadius<=LeftRadius && _PRightRadius<=RightRadius )
-	{
-		getNeighbors< CreateNodes >( node->parent , Initializer );
-		const Neighbors< LeftRadius + RightRadius + 1 >& pNeighbors = this->neighbors[ node->depth()-1 ];
-		// Get the indices of the child node that would contain the point (and its antipode)
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-		// Iterate over the finer neighbors
-		// Here:
-		// (x,y,z) give the position of the finer nodes relative to the center,
-		// (_x,_y,_z) give a positive global position, up to an even offset, and
-		// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-		for( int z=-(int)_LeftRadius ; z<=(int)_RightRadius ; z++ )
-		{
-			int _z = (z+cz) + (_LeftRadius<<1) , pz = ( _z>>1 ) - _LeftRadius + LeftRadius , zz = z + _LeftRadius;
-			for( int y=-(int)_LeftRadius ; y<=(int)_RightRadius ; y++ )
-			{
-				int _y = (y+cy) + (_LeftRadius<<1) , py = ( _y>>1 ) - _LeftRadius + LeftRadius , yy = y + _LeftRadius;
-
-				int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-				for( int x=-(int)_LeftRadius ; x<=(int)_RightRadius ; x++ )
-				{
-					int _x = (x+cx) + (_LeftRadius<<1) , px = ( _x>>1 ) - _LeftRadius + LeftRadius , xx = x + _LeftRadius;
-					if( CreateNodes )
-					{
-						if( pNeighbors.neighbors[px][py][pz] )
-						{
-							if( !pNeighbors.neighbors[px][py][pz]->children ) pNeighbors.neighbors[px][py][pz]->initChildren( Initializer );
-							neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-						}
-						else neighbors.neighbors[xx][yy][zz] = NULL;
-					}
-					else
-					{
-						if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-							neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-						else neighbors.neighbors[xx][yy][zz] = NULL;
-					}
-				}
-			}
-		}
-	}
-	// Otherwise recurse
-	else
-	{
-		Neighbors< _PLeftRadius + _PRightRadius + 1 > pNeighbors;
-		getNeighbors< CreateNodes , _PLeftRadius , _PRightRadius >( node->parent , pNeighbors , Initializer );
-
-		// Get the indices of the child node that would contain the point (and its antipode)
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-		// Iterate over the finer neighbors
-		// Here:
-		// (x,y,z) give the position of the finer nodes relative to the center,
-		// (_x,_y,_z) give a positive global position, up to an even offset, and
-		// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-		for( int z=-(int)_LeftRadius ; z<=(int)_RightRadius ; z++ )
-		{
-			int _z = (z+cz) + (_LeftRadius<<1) , pz = ( _z>>1 ) - _LeftRadius + _PLeftRadius , zz = z + _LeftRadius;
-			for( int y=-(int)_LeftRadius ; y<=(int)_RightRadius ; y++ )
-			{
-				int _y = (y+cy) + (_LeftRadius<<1) , py = ( _y>>1 ) - _LeftRadius + _PLeftRadius , yy = y + _LeftRadius;
-
-				int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-				for( int x=-(int)_LeftRadius ; x<=(int)_RightRadius ; x++ )
-				{
-					int _x = (x+cx) + (_LeftRadius<<1) , px = ( _x>>1 ) - _LeftRadius + _PLeftRadius , xx = x + _LeftRadius;
-					if( CreateNodes )
-					{
-						if( pNeighbors.neighbors[px][py][pz] )
-						{
-							if( !pNeighbors.neighbors[px][py][pz]->children ) pNeighbors.neighbors[px][py][pz]->initChildren( Initializer );
-							neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-						}
-						else neighbors.neighbors[xx][yy][zz] = NULL;
-					}
-					else
-					{
-						if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-							neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-						else neighbors.neighbors[xx][yy][zz] = NULL;
-					}
-				}
-			}
-		}
-	}
-}
-
-///////////////////////////////
-// OctNode::ConstNeighborKey //
-///////////////////////////////
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::ConstNeighborKey( void ){ _depth=-1 , neighbors=NULL; }
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::ConstNeighborKey( const ConstNeighborKey& key )
-{
-	_depth = 0 , neighbors = NULL;
-	set( key._depth );
-	for( int d=0 ; d<=_depth ; d++ ) memcpy( &neighbors[d] , &key.neighbors[d] , sizeof( ConstNeighbors< Width > ) );
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::~ConstNeighborKey( void )
-{
-	if( neighbors ) delete[] neighbors;
-	neighbors=NULL;
-}
-
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-void OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::set( int d )
-{
-	if( neighbors ) delete[] neighbors;
-	neighbors = NULL;
-	_depth = d;
-	if( d<0 ) return;
-	neighbors = new ConstNeighbors< Width >[d+1];
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-typename OctNode< NodeData >::template ConstNeighbors< LeftRadius+RightRadius+1 >& OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::getNeighbors( const OctNode< NodeData >* node )
-{
-	ConstNeighbors< Width >& neighbors = this->neighbors[ node->depth() ];
-	if( node!=neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius])
-	{
-		neighbors.clear();
-
-		if( !node->parent ) neighbors.neighbors[LeftRadius][LeftRadius][LeftRadius] = node;
-		else
-		{
-			ConstNeighbors< Width >& pNeighbors = getNeighbors( node->parent );
-
-			// Get the indices of the child node that would contain the point (and its antipode)
-			int cx , cy , cz;
-			Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-			// Iterate over the finer neighbors and set them (if you can)
-			// Here:
-			// (x,y,z) give the position of the finer nodes relative to the center,
-			// (_x,_y,_z) give a positive global position, up to an even offset, and
-			// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-			for( int z=-(int)LeftRadius ; z<=(int)RightRadius ; z++ )
-			{
-				int _z = (z+cz) + (LeftRadius<<1) , pz = ( _z>>1 ) , zz = z+LeftRadius;
-				for( int y=-(int)LeftRadius ; y<=(int)RightRadius ; y++ )
-				{
-					int _y = (y+cy) + (LeftRadius<<1) , py = ( _y>>1 ) , yy = y+LeftRadius;
-
-					int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-					for( int x=-(int)LeftRadius ; x<=(int)RightRadius ; x++ )
-					{
-						int _x = (x+cx) + (LeftRadius<<1) , px = ( _x>>1 ) , xx = x+LeftRadius;
-						if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-							neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-						else
-							neighbors.neighbors[xx][yy][zz] = NULL;
-					}
-				}
-			}
-		}
-	}
-	return neighbors;
-}
-template< class NodeData >
-template< unsigned int LeftRadius , unsigned int RightRadius >
-template< unsigned int _LeftRadius , unsigned int _RightRadius >
-void OctNode< NodeData >::ConstNeighborKey< LeftRadius , RightRadius >::getNeighbors( const OctNode< NodeData >* node , ConstNeighbors< _LeftRadius+_RightRadius+1 >& neighbors )
-{
-	neighbors.clear();
-	if( !node ) return;
-
-	// [WARNING] This estimate of the required radius is somewhat conservative if the readius is odd (depending on where the node is relative to its parent)
-	const unsigned int _PLeftRadius = (_LeftRadius+1)/2 , _PRightRadius = (_RightRadius+1)/2;
-	// If we are at the root of the tree, we are done
-	if( !node->parent ) neighbors.neighbors[_LeftRadius][_LeftRadius][_LeftRadius] = node;
-	// If we can get the data from the the key for the parent node, do that
-	else if( _PLeftRadius<=LeftRadius && _PRightRadius<=RightRadius )
-	{
-		getNeighbors( node->parent );
-		const ConstNeighbors< LeftRadius + RightRadius + 1 >& pNeighbors = this->neighbors[ node->depth()-1 ];
-		// Get the indices of the child node that would contain the point (and its antipode)
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-		// Iterate over the finer neighbors
-		// Here:
-		// (x,y,z) give the position of the finer nodes relative to the center,
-		// (_x,_y,_z) give a positive global position, up to an even offset, and
-		// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-		for( int z=-(int)_LeftRadius ; z<=(int)_RightRadius ; z++ )
-		{
-			int _z = (z+cz) + (_LeftRadius<<1) , pz = ( _z>>1 ) - _LeftRadius + LeftRadius , zz = z + _LeftRadius;
-			for( int y=-(int)_LeftRadius ; y<=(int)_RightRadius ; y++ )
-			{
-				int _y = (y+cy) + (_LeftRadius<<1) , py = ( _y>>1 ) - _LeftRadius + LeftRadius , yy = y + _LeftRadius;
-
-				int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-				for( int x=-(int)_LeftRadius ; x<=(int)_RightRadius ; x++ )
-				{
-					int _x = (x+cx) + (_LeftRadius<<1) , px = ( _x>>1 ) - _LeftRadius + LeftRadius , xx = x + _LeftRadius;
-					if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children ) 
-						neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-					else
-						neighbors.neighbors[xx][yy][zz] = NULL;
-				}
-			}
-		}
-	}
-	// Otherwise recurse
-	else
-	{
-		ConstNeighbors< _PLeftRadius + _PRightRadius + 1 > pNeighbors;
-		getNeighbors< _PLeftRadius , _PRightRadius >( node->parent , pNeighbors );
-
-		// Get the indices of the child node that would contain the point (and its antipode)
-		int cx , cy , cz;
-		Cube::FactorCornerIndex( (int)( node - node->parent->children ) , cx , cy , cz );
-
-
-		// Iterate over the finer neighbors
-		// Here:
-		// (x,y,z) give the position of the finer nodes relative to the center,
-		// (_x,_y,_z) give a positive global position, up to an even offset, and
-		// (px-LeftRadius,py-LeftRadius,pz-LeftRadius) give the positions of their parents relative to the parent of the center
-		for( int z=-(int)_LeftRadius ; z<=(int)_RightRadius ; z++ )
-		{
-			int _z = (z+cz) + (_LeftRadius<<1) , pz = ( _z>>1 ) - _LeftRadius + _PLeftRadius , zz = z + _LeftRadius;
-			for( int y=-(int)_LeftRadius ; y<=(int)_RightRadius ; y++ )
-			{
-				int _y = (y+cy) + (_LeftRadius<<1) , py = ( _y>>1 ) - _LeftRadius + _PLeftRadius , yy = y + _LeftRadius;
-
-				int cornerIndex = ( (_z&1)<<2 ) | ( (_y&1)<<1 );
-				for( int x=-(int)_LeftRadius ; x<=(int)_RightRadius ; x++ )
-				{
-					int _x = (x+cx) + (_LeftRadius<<1) , px = ( _x>>1 ) - _LeftRadius + _PLeftRadius , xx = x + _LeftRadius;
-
-					if( pNeighbors.neighbors[px][py][pz] && pNeighbors.neighbors[px][py][pz]->children )
-						neighbors.neighbors[xx][yy][zz] = pNeighbors.neighbors[px][py][pz]->children + ( cornerIndex | (_x&1) );
-					else
-						neighbors.neighbors[xx][yy][zz] = NULL;
-				}
-			}
-		}
-	}
-	return;
-}
-
-template< class NodeData >
-int OctNode< NodeData >::write(const char* fileName) const{
-	FILE* fp=fopen(fileName,"wb");
-	if(!fp){return 0;}
-	int ret=write(fp);
-	fclose(fp);
-	return ret;
-}
-template< class NodeData >
-int OctNode< NodeData >::write(FILE* fp) const{
-	fwrite(this,sizeof(OctNode< NodeData >),1,fp);
-	if(children){for(int i=0;i<Cube::CORNERS;i++){children[i].write(fp);}}
-	return 1;
-}
-template< class NodeData >
-int OctNode< NodeData >::read( const char* fileName , void (*Initializer)( OctNode& ) )
-{
-	FILE* fp = fopen( fileName , "rb" );
-	if( !fp ) return 0;
-	int ret = read( fp , Initializer );
-	fclose( fp );
-	return ret;
-}
-template< class NodeData >
-int OctNode< NodeData >::read( FILE* fp , void (*Initializer)( OctNode& ) )
-{
-	fread( this , sizeof( OctNode< NodeData > ) , 1 , fp );
-	parent = NULL;
-	if( children )
-	{
-		children=NULL;
-		initChildren( Initializer );
-		for( int i=0 ; i<Cube::CORNERS ; i++ ) children[i].read(fp) , children[i].parent=this;
-	}
-	return 1;
-}
-template< class NodeData >
-int OctNode< NodeData >::width(int maxDepth) const {
-	int d=depth();
-	return 1<<(maxDepth-d); 
-}
-template< class NodeData >
-void OctNode< NodeData >::centerIndex(int maxDepth,int index[DIMENSION]) const
-{
-	int d,o[3];
-	depthAndOffset(d,o);
-	for(int i=0;i<DIMENSION;i++) index[i]=BinaryNode::CornerIndex( maxDepth , d+1 , o[i]<<1 , 1 );
-}
diff --git a/Src/PNG.h b/Src/PNG.h
new file mode 100644
index 0000000..120fb66
--- /dev/null
+++ b/Src/PNG.h
@@ -0,0 +1,35 @@
+#ifndef PNG_INCLUDED
+#define PNG_INCLUDED
+
+#include "PNG/png.h"
+
+struct PNGReader : public ImageReader
+{
+	PNGReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+	~PNGReader( void );
+	unsigned int nextRow( unsigned char* row );
+	static bool GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels );
+protected:
+	png_structp _png_ptr;
+	png_infop _info_ptr;
+	png_infop _end_info ;
+	FILE* _fp;
+	unsigned char* _scratchRow;
+	unsigned int _currentRow;
+};
+
+struct PNGWriter : public ImageWriter
+{
+	PNGWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , unsigned int quality=100 );
+	~PNGWriter( void );
+	unsigned int nextRow( const unsigned char* row );
+	unsigned int nextRows( const unsigned char* rows , unsigned int rowNum );
+protected:
+	FILE* _fp;
+	png_structp _png_ptr;
+	png_infop _info_ptr;
+	unsigned int _currentRow;
+};
+
+#include "PNG.inl"
+#endif //PNG_INCLUDED
diff --git a/Src/PNG.inl b/Src/PNG.inl
new file mode 100644
index 0000000..d024f78
--- /dev/null
+++ b/Src/PNG.inl
@@ -0,0 +1,149 @@
+#include <stdio.h>
+#include <vector>
+#ifdef _WIN32
+#include "PNG/png.h"
+#else // !_WIN32
+#include <png.h>
+#endif // _WIN32
+
+inline PNGReader::PNGReader( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	_currentRow = 0;
+
+	_png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING , 0 , 0 , 0);
+	if( !_png_ptr ) ERROR_OUT( "Failed to create png pointer" );
+	_info_ptr = png_create_info_struct( _png_ptr );
+	if( !_info_ptr ) ERROR_OUT( "Failed to create info pointer" );
+
+	_end_info = png_create_info_struct( _png_ptr );
+	if( !_end_info ) ERROR_OUT( "Failed to create end pointer" );
+
+
+	_fp = fopen( fileName , "rb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
+	png_init_io( _png_ptr , _fp );
+
+	png_read_info( _png_ptr, _info_ptr );
+
+	width = png_get_image_width( _png_ptr , _info_ptr );
+	height = png_get_image_height( _png_ptr, _info_ptr );
+	channels = png_get_channels( _png_ptr , _info_ptr );
+	int bit_depth=png_get_bit_depth( _png_ptr , _info_ptr );
+	int color_type = png_get_color_type( _png_ptr , _info_ptr );
+	if( bit_depth==16 )
+	{
+		WARN( "Converting 16-bit image to 8-bit image" );
+		_scratchRow = new unsigned char[ channels*width*2 ];
+	}
+	else
+	{
+		if( bit_depth!=8 ) ERROR_OUT( "Expected 8 bits per channel" );
+		_scratchRow = NULL;
+	}
+	if( color_type==PNG_COLOR_TYPE_PALETTE ) png_set_expand( _png_ptr ) , printf( "Expanding PNG color pallette\n" );
+
+	{
+		long int a = 1;
+		int swap = (*((unsigned char *) &a) == 1);
+		if( swap ) png_set_swap( _png_ptr );
+	}
+}
+inline unsigned int PNGReader::nextRow( unsigned char* row )
+{
+	if( _scratchRow )
+	{
+		int width = png_get_image_width( _png_ptr , _info_ptr );
+		int channels = png_get_channels( _png_ptr , _info_ptr );
+
+		png_read_row( _png_ptr , (png_bytep)_scratchRow , NULL );
+#pragma omp parallel for
+		for( int i=0 ; i<width*channels ; i++ ) row[i] = _scratchRow[2*i];
+	}
+	else png_read_row( _png_ptr , (png_bytep)row , NULL );
+	return _currentRow++;
+}
+
+PNGReader::~PNGReader( void )
+{
+	if( _scratchRow ) delete[] _scratchRow;
+	_scratchRow = NULL;
+	png_destroy_read_struct( &_png_ptr , &_info_ptr , &_end_info );
+}
+
+inline bool PNGReader::GetInfo( const char* fileName , unsigned int& width , unsigned int& height , unsigned int& channels )
+{
+	png_structp png_ptr;
+	png_infop info_ptr;
+	png_infop end_info ;
+	FILE* fp;
+
+	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING , 0 , 0 , 0);
+	if( !png_ptr ) ERROR_OUT( "Failed to create png pointer" );
+	info_ptr = png_create_info_struct( png_ptr );
+	if( !info_ptr ) ERROR_OUT( "Failed to create info pointer" );
+	end_info = png_create_info_struct( png_ptr );
+	if( !end_info ) ERROR_OUT( "Failed to create end pointer" );
+
+	fp = fopen( fileName , "rb" );
+	if( !fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
+	png_init_io( png_ptr , fp );
+
+	png_read_info( png_ptr, info_ptr );
+
+	width = png_get_image_width( png_ptr , info_ptr );
+	height = png_get_image_height( png_ptr, info_ptr );
+	channels = png_get_channels( png_ptr , info_ptr );
+
+	png_destroy_read_struct( &png_ptr , &info_ptr , &end_info );
+	fclose( fp );
+	return true;
+}
+
+PNGWriter::PNGWriter( const char* fileName , unsigned int width , unsigned int height , unsigned int channels , unsigned int quality )
+{
+	_currentRow = 0;
+
+	_png_ptr = png_create_write_struct( PNG_LIBPNG_VER_STRING , 0 , 0 , 0 );
+	if( !_png_ptr )	ERROR_OUT( "Failed to create png write struct" );
+	_info_ptr = png_create_info_struct( _png_ptr );
+	if( !_info_ptr ) ERROR_OUT( "Failed to create png info struct");
+
+	_fp = fopen( fileName , "wb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
+	png_init_io( _png_ptr , _fp );
+
+	png_set_compression_level( _png_ptr , Z_BEST_SPEED );
+
+	int pngColorType;
+	switch( channels )
+	{
+		case 1: pngColorType = PNG_COLOR_TYPE_GRAY ; break;
+		case 3: pngColorType = PNG_COLOR_TYPE_RGB  ; break;
+		case 4: pngColorType = PNG_COLOR_TYPE_RGBA ; break;
+		default: ERROR_OUT( "Only 1, 3, or 4 channel PNGs are supported" );
+	};
+	png_set_IHDR( _png_ptr , _info_ptr, width , height, 8 , pngColorType , PNG_INTERLACE_NONE , PNG_COMPRESSION_TYPE_DEFAULT , PNG_FILTER_TYPE_DEFAULT );
+	png_write_info( _png_ptr , _info_ptr );
+
+	{
+		long int a = 1;
+		int swap = (*((unsigned char *) &a) == 1);
+		if( swap ) png_set_swap( _png_ptr );
+	}
+}
+PNGWriter::~PNGWriter( void )
+{
+	png_write_end( _png_ptr , NULL );
+	png_destroy_write_struct( &_png_ptr , &_info_ptr );
+	fclose( _fp );
+}
+unsigned int PNGWriter::nextRow( const unsigned char* row )
+{
+	png_write_row( _png_ptr , (png_bytep)row );
+	return _currentRow++;
+}
+unsigned int PNGWriter::nextRows( const unsigned char* rows , unsigned int rowNum )
+{
+	for( unsigned int r=0 ; r<rowNum ; r++ ) png_write_row( _png_ptr , (png_bytep)( rows + r * 3 * sizeof( unsigned char ) * _png_ptr->width ) );
+	return _currentRow += rowNum;
+}
diff --git a/Src/PPolynomial.inl b/Src/PPolynomial.inl
index a78ae58..be25aff 100644
--- a/Src/PPolynomial.inl
+++ b/Src/PPolynomial.inl
@@ -172,7 +172,7 @@ template<int Degree>
 double PPolynomial<Degree>::operator ()( double t ) const
 {
 	double v=0;
-	for( int i=0 ; i<int(polyCount) && t>polys[i].start ; i++ ) v += polys[i].p(t);
+	for( int i=0 ; i<int(polyCount) && t>polys[i].start ; i++ ) v+=polys[i].p(t);
 	return v;
 }
 
diff --git a/Src/Ply.h b/Src/Ply.h
index 699381f..1c99add 100644
--- a/Src/Ply.h
+++ b/Src/Ply.h
@@ -1,226 +1,42 @@
 /*
 
- Header for PLY polygon files.
- 
-  - Greg Turk, March 1994
-  
-   A PLY file contains a single polygonal _object_.
-   
-	An object is composed of lists of _elements_.  Typical elements are
-	vertices, faces, edges and materials.
-	
-	 Each type of element for a given object has one or more _properties_
-	 associated with the element type.  For instance, a vertex element may
-	 have as properties three floating-point values x,y,z and three unsigned
-	 chars for red, green and blue.
-	 
-	  ---------------------------------------------------------------
-	  
-	   Copyright (c) 1994 The Board of Trustees of The Leland Stanford
-	   Junior University.  All rights reserved.   
-	   
-		Permission to use, copy, modify and distribute this software and its   
-		documentation for any purpose is hereby granted without fee, provided   
-		that the above copyright notice and this permission notice appear in   
-		all copies of this software and that you do not sell the software.   
-		
-		 THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
-		 EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
-		 WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
-		 
-*/
+Header for PLY polygon files.
 
-#ifndef __PLY_H__
-#define __PLY_H__
+- Greg Turk, March 1994
 
-#define USE_PLY_WRAPPER 1
-
-#ifndef WIN32
-#define _strdup strdup
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-	
-#include <stdlib.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <string.h>
-    
-#define PLY_ASCII         1      /* ascii PLY file */
-#define PLY_BINARY_BE     2      /* binary PLY file, big endian */
-#define PLY_BINARY_LE     3      /* binary PLY file, little endian */
-#define PLY_BINARY_NATIVE 4      /* binary PLY file, same endianness as current architecture */
-    
-#define PLY_OKAY    0           /* ply routine worked okay */
-#define PLY_ERROR  -1           /* error in ply routine */
-	
-	/* scalar data types supported by PLY format */
-	
-#define PLY_START_TYPE 0
-#define PLY_CHAR       1
-#define PLY_SHORT      2
-#define PLY_INT        3
-#define PLY_UCHAR      4
-#define PLY_USHORT     5
-#define PLY_UINT       6
-#define PLY_FLOAT      7
-#define PLY_DOUBLE     8
-#define PLY_INT_8      9
-#define PLY_UINT_8     10
-#define PLY_INT_16     11
-#define PLY_UINT_16    12
-#define PLY_INT_32     13
-#define PLY_UINT_32    14
-#define PLY_FLOAT_32   15
-#define PLY_FLOAT_64   16
-	
-#define PLY_END_TYPE   17
-	
-#define  PLY_SCALAR  0
-#define  PLY_LIST    1
-	
-#define PLY_STRIP_COMMENT_HEADER 0
-
-typedef struct PlyProperty {    /* description of a property */
-	
-	char *name;                           /* property name */
-	int external_type;                    /* file's data type */
-	int internal_type;                    /* program's data type */
-	int offset;                           /* offset bytes of prop in a struct */
-	
-	int is_list;                          /* 1 = list, 0 = scalar */
-	int count_external;                   /* file's count type */
-	int count_internal;                   /* program's count type */
-	int count_offset;                     /* offset byte for list count */
-	
-} PlyProperty;
-
-typedef struct PlyElement {     /* description of an element */
-	char *name;                   /* element name */
-	int num;                      /* number of elements in this object */
-	int size;                     /* size of element (bytes) or -1 if variable */
-	int nprops;                   /* number of properties for this element */
-	PlyProperty **props;          /* list of properties in the file */
-	char *store_prop;             /* flags: property wanted by user? */
-	int other_offset;             /* offset to un-asked-for props, or -1 if none*/
-	int other_size;               /* size of other_props structure */
-} PlyElement;
-
-typedef struct PlyOtherProp {   /* describes other properties in an element */
-	char *name;                   /* element name */
-	int size;                     /* size of other_props */
-	int nprops;                   /* number of properties in other_props */
-	PlyProperty **props;          /* list of properties in other_props */
-} PlyOtherProp;
-
-typedef struct OtherData { /* for storing other_props for an other element */
-	void *other_props;
-} OtherData;
-
-typedef struct OtherElem {     /* data for one "other" element */
-	char *elem_name;             /* names of other elements */
-	int elem_count;              /* count of instances of each element */
-	OtherData **other_data;      /* actual property data for the elements */
-	PlyOtherProp *other_props;   /* description of the property data */
-} OtherElem;
-
-typedef struct PlyOtherElems {  /* "other" elements, not interpreted by user */
-	int num_elems;                /* number of other elements */
-	OtherElem *other_list;        /* list of data for other elements */
-} PlyOtherElems;
-
-typedef struct PlyFile {        /* description of PLY file */
-	FILE *fp;                     /* file pointer */
-	int file_type;                /* ascii or binary */
-	float version;                /* version number of file */
-	int nelems;                   /* number of elements of object */
-	PlyElement **elems;           /* list of elements */
-	int num_comments;             /* number of comments */
-	char **comments;              /* list of comments */
-	int num_obj_info;             /* number of items of object information */
-	char **obj_info;              /* list of object info items */
-	PlyElement *which_elem;       /* which element we're currently writing */
-	PlyOtherElems *other_elems;   /* "other" elements from a PLY file */
-} PlyFile;
-	
-	/* memory allocation */
-extern char *my_alloc();
-#define myalloc(mem_size) my_alloc((mem_size), __LINE__, __FILE__)
-
-#ifndef ALLOCN
-#define REALLOCN(PTR,TYPE,OLD_N,NEW_N)							\
-{										\
-	if ((OLD_N) == 0)                                           		\
-{   ALLOCN((PTR),TYPE,(NEW_N));}                            		\
-	else									\
-{								    		\
-	(PTR) = (TYPE *)realloc((PTR),(NEW_N)*sizeof(TYPE));			\
-	if (((PTR) == NULL) && ((NEW_N) != 0))					\
-{									\
-	fprintf(stderr, "Memory reallocation failed on line %d in %s\n", 	\
-	__LINE__, __FILE__);                             		\
-	fprintf(stderr, "  tried to reallocate %d->%d\n",       		\
-	(OLD_N), (NEW_N));                              		\
-	exit(-1);								\
-}									\
-	if ((NEW_N)>(OLD_N))							\
-	memset((char *)(PTR)+(OLD_N)*sizeof(TYPE), 0,			\
-	((NEW_N)-(OLD_N))*sizeof(TYPE));				\
-}										\
-}
+A PLY file contains a single polygonal _object_.
 
-#define  ALLOCN(PTR,TYPE,N) 					\
-{ (PTR) = (TYPE *) calloc(((unsigned)(N)),sizeof(TYPE));\
-	if ((PTR) == NULL) {    				\
-	fprintf(stderr, "Memory allocation failed on line %d in %s\n", \
-	__LINE__, __FILE__);                           \
-	exit(-1);                                             \
-	}							\
-}
+An object is composed of lists of _elements_.  Typical elements are
+vertices, faces, edges and materials.
 
+Each type of element for a given object has one or more _properties_
+associated with the element type.  For instance, a vertex element may
+have as properties three floating-point values x,y,z and three unsigned
+chars for red, green and blue.
+
+---------------------------------------------------------------
+
+Copyright (c) 1994 The Board of Trustees of The Leland Stanford
+Junior University.  All rights reserved.   
+
+Permission to use, copy, modify and distribute this software and its   
+documentation for any purpose is hereby granted without fee, provided   
+that the above copyright notice and this permission notice appear in   
+all copies of this software and that you do not sell the software.   
+
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
+
+*/
+
+
+#ifndef __PLY_H__
+#define __PLY_H__
 
-#define FREE(PTR)  { free((PTR)); (PTR) = NULL; }
-#endif
-
-
-/*** delcaration of routines ***/
-
-extern PlyFile *ply_write(FILE *, int, const char **, int);
-extern PlyFile *ply_open_for_writing(char *, int, const char **, int, float *);
-extern void ply_describe_element(PlyFile *, char *, int, int, PlyProperty *);
-extern void ply_describe_property(PlyFile *, const char *, PlyProperty *);
-extern void ply_element_count(PlyFile *, const char *, int);
-extern void ply_header_complete(PlyFile *);
-extern void ply_put_element_setup(PlyFile *, const char *);
-extern void ply_put_element(PlyFile *, void *);
-extern void ply_put_comment(PlyFile *, char *);
-extern void ply_put_obj_info(PlyFile *, char *);
-extern PlyFile *ply_read(FILE *, int *, char ***);
-extern PlyFile *ply_open_for_reading( char *, int *, char ***, int *, float *);
-extern PlyProperty **ply_get_element_description(PlyFile *, char *, int*, int*);
-extern void ply_get_element_setup( PlyFile *, char *, int, PlyProperty *);
-extern int ply_get_property(PlyFile *, char *, PlyProperty *);
-extern PlyOtherProp *ply_get_other_properties(PlyFile *, char *, int);
-extern void ply_get_element(PlyFile *, void *);
-extern char **ply_get_comments(PlyFile *, int *);
-extern char **ply_get_obj_info(PlyFile *, int *);
-extern void ply_close(PlyFile *);
-extern void ply_get_info(PlyFile *, float *, int *);
-extern PlyOtherElems *ply_get_other_element (PlyFile *, char *, int);
-extern void ply_describe_other_elements ( PlyFile *, PlyOtherElems *);
-extern void ply_put_other_elements (PlyFile *);
-extern void ply_free_other_elements (PlyOtherElems *);
-extern void ply_describe_other_properties(PlyFile *, PlyOtherProp *, int);
-
-extern int equal_strings(const char *, const char *);
-
-#ifdef __cplusplus
-}
-#endif
-#include "Geometry.h"
 #include <vector>
+#include "PlyFile.h"
+#include "Geometry.h"
 
 template< class Real > int PLYType( void );
 template<> inline int PLYType< int           >( void ){ return PLY_INT   ; }
@@ -228,7 +44,7 @@ template<> inline int PLYType<          char >( void ){ return PLY_CHAR  ; }
 template<> inline int PLYType< unsigned char >( void ){ return PLY_UCHAR ; }
 template<> inline int PLYType<        float  >( void ){ return PLY_FLOAT ; }
 template<> inline int PLYType<        double >( void ){ return PLY_DOUBLE; }
-template< class Real > inline int PLYType( void ){ fprintf( stderr , "[ERROR] Unrecognized type\n" ) , exit( 0 ); }
+template< class Real > inline int PLYType( void ){ ERROR_OUT( "Unrecognized type" ); }
 
 typedef struct PlyFace
 {
@@ -238,417 +54,275 @@ typedef struct PlyFace
 } PlyFace;
 static PlyProperty face_props[] =
 {
-	{ _strdup( "vertex_indices" ) , PLY_INT , PLY_INT , offsetof( PlyFace , vertices ) , 1 , PLY_UCHAR, PLY_UCHAR , offsetof(PlyFace,nr_vertices) },
+	PlyProperty( "vertex_indices" , PLY_INT , PLY_INT , offsetof( PlyFace , vertices ) , 1 , PLY_UCHAR, PLY_UCHAR , offsetof( PlyFace , nr_vertices ) ) ,
 };
 
 
-///////////////////
-// PlyVertexType //
-///////////////////
-
-// The "Wrapper" class indicates the class to cast to/from in order to support linear operations.
-template< class Real >
+struct RGBColor
+{
+	unsigned char c[3];
+	unsigned char& operator[]( int idx )       { return c[idx]; }
+	unsigned char  operator[]( int idx ) const { return c[idx]; }
+	RGBColor( void ){ c[0] = c[1] = c[2] = 0; }
+	RGBColor( const RGBColor& rgb ){ memcpy( c , rgb.c , sizeof(unsigned char) * 3 ); }
+	RGBColor& operator = ( const RGBColor& rgb ){ memcpy( c , rgb.c , sizeof(unsigned char) * 3 ) ; return *this; }
+};
+///////////////
+// PlyVertex //
+///////////////
+template< typename _Real , int Dim , typename _RealOnDisk=float >
 class PlyVertex
 {
 public:
-	typedef PlyVertex Wrapper;
-
-	const static int ReadComponents=3;
-	const static int WriteComponents=3;
-	static PlyProperty ReadProperties[];
-	static PlyProperty WriteProperties[];
-
-	Point3D< Real > point;
-
-	PlyVertex( void ) { ; }
-	PlyVertex( Point3D< Real > p ) { point=p; }
-	PlyVertex operator + ( PlyVertex p ) const { return PlyVertex( point+p.point ); }
-	PlyVertex operator - ( PlyVertex p ) const { return PlyVertex( point-p.point ); }
-	template< class _Real > PlyVertex operator * ( _Real s ) const { return PlyVertex( point*s ); }
-	template< class _Real > PlyVertex operator / ( _Real s ) const { return PlyVertex( point/s ); }
-	PlyVertex& operator += ( PlyVertex p ) { point += p.point ; return *this; }
-	PlyVertex& operator -= ( PlyVertex p ) { point -= p.point ; return *this; }
-	template< class _Real > PlyVertex& operator *= ( _Real s ) { point *= s ; return *this; }
-	template< class _Real > PlyVertex& operator /= ( _Real s ) { point /= s ; return *this; }
+	typedef _Real Real;
+
+	PlyVertex& operator += ( const PlyVertex& p ){ point += p.point ; return *this; }
+	PlyVertex& operator -= ( const PlyVertex& p ){ point -= p.point ; return *this; }
+	PlyVertex& operator *= ( Real s )            { point *= s ; return *this; }
+	PlyVertex& operator /= ( Real s )            { point /= s ; return *this; }
+	PlyVertex  operator +  ( const PlyVertex& p ) const { return PlyVertex( point + p.point ); }
+	PlyVertex  operator -  ( const PlyVertex& p ) const { return PlyVertex( point - p.point ); }
+	PlyVertex  operator *  ( Real s )             const { return PlyVertex( point * s ); }
+	PlyVertex  operator /  ( Real s )             const { return PlyVertex( point / s ); }
+
+	const static int PlyReadNum = Dim;
+	const static int PlyWriteNum = Dim;
+
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+
+	Point< Real , Dim > point;
+	PlyVertex( void ) {}
+	PlyVertex( Point< Real , Dim > p ) : point(p) { }
+
+	struct Transform
+	{
+		Transform( void ){}
+		Transform( const XForm< Real , Dim+1 >& xForm ) : _pointXForm(xForm) { }
+		PlyVertex operator() ( const PlyVertex& p ) const
+		{
+			PlyVertex _p;
+			_p.point = _pointXForm * p.point;
+			return _p;
+		}
+	protected:
+		XForm< Real , Dim+1 > _pointXForm;
+	};
+
+protected:
+	static const PlyProperty _PlyProperties[];
 };
-template< class Real , class _Real > PlyVertex< Real > operator * ( XForm4x4< _Real > xForm , PlyVertex< Real > v ) { return PlyVertex< Real >( xForm * v.point ); }
-template< class Real > PlyProperty PlyVertex< Real >::ReadProperties[]=
+
+template<>
+const PlyProperty PlyVertex< float , 2 , float >::_PlyProperties[] =
 {
-	{ _strdup( "x" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real > PlyProperty PlyVertex< Real >::WriteProperties[]=
+template<>
+const PlyProperty PlyVertex< double , 2 , float >::_PlyProperties[] =
 {
-	{ _strdup( "x" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real >
-class PlyValueVertex
+template<>
+const PlyProperty PlyVertex< float , 2 , double >::_PlyProperties[] =
 {
-public:
-	typedef PlyValueVertex Wrapper;
-
-	const static int ReadComponents=4;
-	const static int WriteComponents=4;
-	static PlyProperty ReadProperties[];
-	static PlyProperty WriteProperties[];
-
-	Point3D<Real> point;
-	Real value;
-
-	PlyValueVertex( void ) : value( Real(0) ) { ; }
-	PlyValueVertex( Point3D< Real > p , Real v ) : point(p) , value(v) { ; }
-	PlyValueVertex operator + ( PlyValueVertex p ) const { return PlyValueVertex( point+p.point , value+p.value ); }
-	PlyValueVertex operator - ( PlyValueVertex p ) const { return PlyValueVertex( point-p.value , value-p.value ); }
-	template< class _Real > PlyValueVertex operator * ( _Real s ) const { return PlyValueVertex( point*s , Real(value*s) ); }
-	template< class _Real > PlyValueVertex operator / ( _Real s ) const { return PlyValueVertex( point/s , Real(value/s) ); }
-	PlyValueVertex& operator += ( PlyValueVertex p ) { point += p.point , value += p.value ; return *this; }
-	PlyValueVertex& operator -= ( PlyValueVertex p ) { point -= p.point , value -= p.value ; return *this; }
-	template< class _Real > PlyValueVertex& operator *= ( _Real s ) { point *= s , value *= Real(s) ; return *this; }
-	template< class _Real > PlyValueVertex& operator /= ( _Real s ) { point /= s , value /= Real(s) ; return *this; }
+	PlyProperty( "x" , PLY_DOUBLE , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_DOUBLE , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real , class _Real > PlyValueVertex< Real > operator * ( XForm4x4< _Real > xForm , PlyValueVertex< Real > v ) { return PlyValueVertex< Real >( xForm * v.point , v.value ); }
-template< class Real > PlyProperty PlyValueVertex< Real >::ReadProperties[]=
+template<>
+const PlyProperty PlyVertex< double , 2 , double >::_PlyProperties[] =
 {
-	{ _strdup( "x"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "value" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , value           ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_DOUBLE , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_DOUBLE , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real > PlyProperty PlyValueVertex< Real >::WriteProperties[]=
+
+template<>
+const PlyProperty PlyVertex< float , 3 , float >::_PlyProperties[] =
 {
-	{ _strdup( "x"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"     ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "value" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyValueVertex , value           ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real >
-class PlyOrientedVertex
+template<>
+const PlyProperty PlyVertex< double , 3 , float >::_PlyProperties[] =
 {
-public:
-	typedef PlyOrientedVertex Wrapper;
-
-	const static int ReadComponents=6;
-	const static int WriteComponents=6;
-	static PlyProperty ReadProperties[];
-	static PlyProperty WriteProperties[];
-
-	Point3D<Real> point , normal;
-
-	PlyOrientedVertex( void ) { ; }
-	PlyOrientedVertex( Point3D< Real > p , Point3D< Real > n ) : point(p) , normal(n) { ; }
-  	PlyOrientedVertex operator + ( PlyOrientedVertex p ) const { return PlyOrientedVertex( point+p.point , normal+p.normal ); }
-	PlyOrientedVertex operator - ( PlyOrientedVertex p ) const { return PlyOrientedVertex( point-p.value , normal-p.normal ); }
-	template< class _Real > PlyOrientedVertex operator * ( _Real s ) const { return PlyOrientedVertex( point*s , normal*s ); }
-	template< class _Real > PlyOrientedVertex operator / ( _Real s ) const { return PlyOrientedVertex( point/s , normal/s ); }
-	PlyOrientedVertex& operator += ( PlyOrientedVertex p ) { point += p.point , normal += p.normal ; return *this; }
-	PlyOrientedVertex& operator -= ( PlyOrientedVertex p ) { point -= p.point , normal -= p.normal ; return *this; }
-	template< class _Real > PlyOrientedVertex& operator *= ( _Real s ) { point *= s , normal *= s ; return *this; }
-	template< class _Real > PlyOrientedVertex& operator /= ( _Real s ) { point /= s , normal /= s ; return *this; }
+	PlyProperty( "x" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real , class _Real > PlyOrientedVertex< Real > operator * ( XForm4x4< _Real > xForm , PlyOrientedVertex< Real > v ) { return PlyOrientedVertex< Real >( xForm * v.point , xForm.inverse().transpose() * v.normal ); }
-template< class Real > PlyProperty PlyOrientedVertex< Real >::ReadProperties[]=
+template<>
+const PlyProperty PlyVertex< float , 3 , double >::_PlyProperties[] =
 {
-	{ _strdup( "x"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "nx" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "ny" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "nz" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[2] ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_DOUBLE , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_DOUBLE , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_DOUBLE , PLY_FLOAT , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real > PlyProperty PlyOrientedVertex< Real >::WriteProperties[]=
+template<>
+const PlyProperty PlyVertex< double , 3 , double >::_PlyProperties[] =
 {
-	{ _strdup( "x"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"  ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex ,  point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "nx" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "ny" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "nz" ) , PLYType< Real >() , PLYType< Real >() , int( offsetof( PlyOrientedVertex , normal.coords[2] ) ) , 0 , 0 , 0 , 0 }
+	PlyProperty( "x" , PLY_DOUBLE , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_DOUBLE , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_DOUBLE , PLY_DOUBLE , int( offsetof( PlyVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
 };
-template< class Real >
-class PlyColorVertex
+
+///////////////////////
+// PlyVertexWithData //
+///////////////////////
+template< typename _Real , int Dim , typename Data , typename _RealOnDisk=float >
+class PlyVertexWithData
 {
 public:
-	struct _PlyColorVertex
-	{
-		Point3D< Real > point , color;
-		_PlyColorVertex( void ) { ; }
-		_PlyColorVertex( Point3D< Real > p , Point3D< Real > c ) : point(p) , color(c) { ; }
-		_PlyColorVertex( PlyColorVertex< Real > p ){ point = p.point ; for( int c=0 ; c<3 ; c++ ) color[c] = (Real) p.color[c]; }
-		operator PlyColorVertex< Real > ()
-		{
-			PlyColorVertex< Real > p;
-			p.point = point;
-			for( int c=0 ; c<3 ; c++ ) p.color[c] = (unsigned char)std::max< int >( 0 , std::min< int >( 255 , (int)( color[c]+0.5 ) ) );
-			return p;
-		}
+	typedef _Real Real;
 
-	  	_PlyColorVertex operator + ( _PlyColorVertex p ) const { return _PlyColorVertex( point+p.point , color+p.color ); }
-		_PlyColorVertex operator - ( _PlyColorVertex p ) const { return _PlyColorVertex( point-p.value , color-p.color ); }
-		template< class _Real > _PlyColorVertex operator * ( _Real s ) const { return _PlyColorVertex( point*s , color*s ); }
-		template< class _Real > _PlyColorVertex operator / ( _Real s ) const { return _PlyColorVertex( point/s , color/s ); }
-		_PlyColorVertex& operator += ( _PlyColorVertex p ) { point += p.point , color += p.color ; return *this; }
-		_PlyColorVertex& operator -= ( _PlyColorVertex p ) { point -= p.point , color -= p.color ; return *this; }
-		template< class _Real > _PlyColorVertex& operator *= ( _Real s ) { point *= s , color *= s ; return *this; }
-		template< class _Real > _PlyColorVertex& operator /= ( _Real s ) { point /= s , color /= s ; return *this; }
-	};
+	PlyVertexWithData& operator += ( const PlyVertexWithData& p ){ point += p.point , data += p.data ; return *this; }
+	PlyVertexWithData& operator -= ( const PlyVertexWithData& p ){ point -= p.point , data -= p.data ; return *this; }
+	PlyVertexWithData& operator *= ( Real s )                    { point *= s , data *= s ; return *this; }
+	PlyVertexWithData& operator /= ( Real s )                    { point /= s , data /= s ; return *this; }
+	PlyVertexWithData  operator +  ( const PlyVertexWithData& p ) const { return PlyVertexWithData( point + p.point , data + p.data ); }
+	PlyVertexWithData  operator -  ( const PlyVertexWithData& p ) const { return PlyVertexWithData( point - p.point , data - p.data ); }
+	PlyVertexWithData  operator *  ( Real s )                     const { return PlyVertexWithData( point * s , data * s ); }
+	PlyVertexWithData  operator /  ( Real s )                     const { return PlyVertexWithData( point / s , data / s ); }
 
-	typedef _PlyColorVertex Wrapper;
+	const static int PlyReadNum = Data::PlyReadNum + Dim;
+	const static int PlyWriteNum = Data::PlyWriteNum + Dim;
 
-	const static int ReadComponents=9;
-	const static int WriteComponents=6;
-	static PlyProperty ReadProperties[];
-	static PlyProperty WriteProperties[];
+	static const PlyProperty* PlyReadProperties( void ){ _SetReadProperties() ; return _PlyReadProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ _SetWriteProperties() ; return _PlyWriteProperties; }
 
-	Point3D< Real > point;
-	unsigned char color[3];
 
-	operator Point3D< Real >& (){ return point; }
-	operator const Point3D< Real >& () const { return point; }
-	PlyColorVertex( void ) { point.coords[0] = point.coords[1] = point.coords[2] = 0 , color[0] = color[1] = color[2] = 0; }
-	PlyColorVertex( const Point3D<Real>& p ) { point=p; }
-	PlyColorVertex( const Point3D< Real >& p , const unsigned char c[3] ) { point = p , color[0] = c[0] , color[1] = c[1] , color[2] = c[2]; }
-};
-template< class Real , class _Real > PlyColorVertex< Real > operator * ( XForm4x4< _Real > xForm , PlyColorVertex< Real > v ) { return PlyColorVertex< Real >( xForm * v.point , v.color ); }
+	Point< Real , Dim > point;
+	Data data;
+	PlyVertexWithData( void ) {}
+	PlyVertexWithData( Point< Real , Dim > p , Data d ) : point(p) , data(d) { }
 
-template< class Real > PlyProperty PlyColorVertex< Real >::ReadProperties[]=
-{
-	{ _strdup( "x"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "red"   ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "green" ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "blue"  ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "r"     ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "g"     ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "b"     ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 }
-};
-template< class Real > PlyProperty PlyColorVertex< Real >::WriteProperties[]=
-{
-	{ _strdup( "x"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "y"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "z"     ) , PLYType<          Real >() , PLYType<          Real >(), int( offsetof( PlyColorVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "red"   ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "green" ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 },
-	{ _strdup( "blue"  ) , PLYType< unsigned char >() , PLYType< unsigned char >(), int( offsetof( PlyColorVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 }
-};
-template< class Real >
-class PlyColorAndValueVertex
-{
-public:
-	struct _PlyColorAndValueVertex
+	struct Transform
 	{
-		Point3D< Real > point , color;
-		Real value;
-		_PlyColorAndValueVertex( void ) : value(0) { ; }
-		_PlyColorAndValueVertex( Point3D< Real > p , Point3D< Real > c , Real v ) : point(p) , color(c) , value(v) { ; }
-		_PlyColorAndValueVertex( PlyColorAndValueVertex< Real > p ){ point = p.point ; for( int c=0 ; c<3 ; c++ ) color[c] = (Real) p.color[c] ; value = p.value; }
-		operator PlyColorAndValueVertex< Real > ()
+		Transform( void ){}
+		Transform( const XForm< Real , Dim+1 >& xForm ) : _pointXForm(xForm) , _dataXForm(xForm) { }
+		PlyVertexWithData operator() ( const PlyVertexWithData& p ) const
 		{
-			PlyColorAndValueVertex< Real > p;
-			p.point = point;
-			for( int c=0 ; c<3 ; c++ ) p.color[c] = (unsigned char)std::max< int >( 0 , std::min< int >( 255 , (int)( color[c]+0.5 ) ) );
-			p.value = value;
-			return p;
+			PlyVertexWithData _p;
+			_p.point = _pointXForm * p.point;
+			_p.data = _dataXForm( p.data );
+			return _p;
 		}
-
-	  	_PlyColorAndValueVertex operator + ( _PlyColorAndValueVertex p ) const { return _PlyColorAndValueVertex( point+p.point , color+p.color , value+p.value ); }
-		_PlyColorAndValueVertex operator - ( _PlyColorAndValueVertex p ) const { return _PlyColorAndValueVertex( point-p.value , color-p.color , value+p.value ); }
-		template< class _Real > _PlyColorAndValueVertex operator * ( _Real s ) const { return _PlyColorAndValueVertex( point*s , color*s , value*s ); }
-		template< class _Real > _PlyColorAndValueVertex operator / ( _Real s ) const { return _PlyColorAndValueVertex( point/s , color/s , value/s ); }
-		_PlyColorAndValueVertex& operator += ( _PlyColorAndValueVertex p ) { point += p.point , color += p.color , value += p.value ; return *this; }
-		_PlyColorAndValueVertex& operator -= ( _PlyColorAndValueVertex p ) { point -= p.point , color -= p.color , value -= p.value ; return *this; }
-		template< class _Real > _PlyColorAndValueVertex& operator *= ( _Real s ) { point *= s , color *= s , value *= (Real)s ; return *this; }
-		template< class _Real > _PlyColorAndValueVertex& operator /= ( _Real s ) { point /= s , color /= s , value /= (Real)s ; return *this; }
+	protected:
+		XForm< Real , Dim+1 > _pointXForm;
+		typename Data::Transform _dataXForm;
 	};
 
-	typedef _PlyColorAndValueVertex Wrapper;
-
-	const static int ReadComponents=10;
-	const static int WriteComponents=7;
-	static PlyProperty ReadProperties[];
-	static PlyProperty WriteProperties[];
-
-	Point3D< Real > point;
-	unsigned char color[3];
-	Real value;
-
-	operator Point3D< Real >& (){ return point; }
-	operator const Point3D< Real >& () const { return point; }
-	PlyColorAndValueVertex( void ) { point.coords[0] = point.coords[1] = point.coords[2] = (Real)0 , color[0] = color[1] = color[2] = 0 , value = (Real)0; }
-	PlyColorAndValueVertex( const Point3D< Real >& p ) { point=p; }
-	PlyColorAndValueVertex( const Point3D< Real >& p , const unsigned char c[3] , Real v) { point = p , color[0] = c[0] , color[1] = c[1] , color[2] = c[2] , value = v; }
+protected:
+	static void _SetReadProperties( void );
+	static void _SetWriteProperties( void );
+	static PlyProperty _PlyReadProperties[];
+	static PlyProperty _PlyWriteProperties[];
 };
-template< class Real , class _Real > PlyColorAndValueVertex< Real > operator * ( XForm4x4< _Real > xForm , PlyColorAndValueVertex< Real > v ) { return PlyColorAndValueVertex< Real >( xForm * v.point , v.color , v.value ); }
-template< class Real > PlyProperty PlyColorAndValueVertex< Real >::ReadProperties[]=
+template< typename Real , int Dim , typename Data , typename RealOnDisk > PlyProperty PlyVertexWithData< Real , Dim , Data , RealOnDisk >::_PlyReadProperties[ PlyReadNum ];
+template< typename Real , int Dim , typename Data , typename RealOnDisk > PlyProperty PlyVertexWithData< Real , Dim , Data , RealOnDisk >::_PlyWriteProperties[ PlyWriteNum ];
+template< typename Real , int Dim , typename Data , typename RealOnDisk >
+void PlyVertexWithData< Real , Dim , Data , RealOnDisk >::_SetReadProperties( void )
 {
-	{ _strdup( "x"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "y"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "z"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "value" ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex ,        value    ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "red"   ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "green" ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "blue"  ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "r"     ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "g"     ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "b"     ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 }
-};
-template< class Real > PlyProperty PlyColorAndValueVertex< Real >::WriteProperties[]=
+	{
+		const PlyProperty * ReadProps = PlyVertex< Real , Dim , RealOnDisk >::PlyReadProperties();
+		for( int d=0 ; d<PlyVertex< Real , Dim , RealOnDisk >::PlyReadNum ; d++ ) _PlyReadProperties[d] = ReadProps[d];
+	}
+	{
+		const PlyProperty * ReadProps = Data::PlyReadProperties();
+		for( int d=0 ; d<Data::PlyReadNum ; d++ )
+		{
+			_PlyReadProperties[d+PlyVertex< Real , Dim , RealOnDisk >::PlyReadNum ] = ReadProps[d];
+			_PlyReadProperties[d+PlyVertex< Real , Dim , RealOnDisk >::PlyReadNum ].offset += (int)offsetof( PlyVertexWithData , data );
+		}
+	}
+}
+template< typename Real , int Dim , typename Data , typename RealOnDisk >
+void PlyVertexWithData< Real , Dim , Data , RealOnDisk >::_SetWriteProperties( void )
 {
-	{ _strdup( "x"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "y"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "z"     ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex , point.coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "value" ) , PLYType<          Real >() , PLYType<          Real >() , int( offsetof( PlyColorAndValueVertex ,        value    ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "red"   ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "green" ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ _strdup( "blue"  ) , PLYType< unsigned char >() , PLYType< unsigned char >() , int( offsetof( PlyColorAndValueVertex ,        color[2] ) ) , 0 , 0 , 0 , 0 }
-};
+	{
+		const PlyProperty * WriteProps = PlyVertex< Real , Dim , RealOnDisk >::PlyWriteProperties();
+		for( int d=0 ; d<PlyVertex< Real , Dim , RealOnDisk >::PlyWriteNum ; d++ ) _PlyWriteProperties[d] = WriteProps[d];
+	}
+	{
+		const PlyProperty * WriteProps = Data::PlyWriteProperties();
+		for( int d=0 ; d<Data::PlyWriteNum ; d++ )
+		{
+			_PlyWriteProperties[d+PlyVertex< Real , Dim , RealOnDisk >::PlyWriteNum ] = WriteProps[d];
+			_PlyWriteProperties[d+PlyVertex< Real , Dim , RealOnDisk >::PlyWriteNum ].offset += (int)offsetof( PlyVertexWithData , data );
+		}
+	}
+}
 
-template< class Vertex , class Real >
-int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >*  mesh , int file_type , const Point3D< float >& translate , float scale , char** comments=NULL , int commentNum=0 , XForm4x4< Real > xForm=XForm4x4< Real >::Identity() );
+template< class Vertex , class Real , int Dim >
+int PlyWritePolygons( const char* fileName , CoredMeshData< Vertex >*  mesh , int file_type , const Point< float , Dim >& translate , float scale , const std::vector< std::string >& comments , XForm< Real , Dim+1 > xForm=XForm< Real , Dim+1 >::Identity() );
 
-template< class Vertex , class Real >
-int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >*  mesh , int file_type , char** comments=NULL , int commentNum=0 , XForm4x4< Real > xForm=XForm4x4< Real >::Identity() );
+template< class Vertex , class Real , int Dim >
+int PlyWritePolygons( const char* fileName , CoredMeshData< Vertex >*  mesh , int file_type , const std::vector< std::string >& comments , XForm< Real , Dim+1 > xForm=XForm< Real , Dim+1 >::Identity() );
 
-inline bool PlyReadHeader( char* fileName , PlyProperty* properties , int propertyNum , bool* readFlags , int& file_type )
+inline bool PlyReadHeader( char* fileName , const PlyProperty* properties , int propertyNum , bool* readFlags , int& file_type )
 {
-	int nr_elems;
-	char **elist;
+	std::vector< std::string > elist;
 	float version;
-	PlyFile* ply;
-	char* elem_name;
-	int num_elems;
-	int nr_props;
-	PlyProperty** plist;
 
-	ply = ply_open_for_reading( fileName , &nr_elems , &elist , &file_type , &version );
+	PlyFile *ply = PlyFile::Read( fileName , elist , file_type , version );
 	if( !ply ) return false;
 
-	for( int i=0 ; i<nr_elems ; i++ )
-	{
-		elem_name = elist[i];
-		plist = ply_get_element_description( ply , elem_name , &num_elems , &nr_props );
-		if( !plist )
-		{
-			for( int i=0 ; i<nr_elems ; i++ )
-			{
-				free( ply->elems[i]->name );
-				free( ply->elems[i]->store_prop );
-				for( int j=0 ; j<ply->elems[i]->nprops ; j++ )
-				{
-					free( ply->elems[i]->props[j]->name );
-					free( ply->elems[i]->props[j] );
-				}
-				free( ply->elems[i]->props );
-			}
-			for( int i=0 ; i<nr_elems ; i++ ) free( ply->elems[i] );
-			free( ply->elems );
-			for( int i=0 ; i<ply->num_comments ; i++ ) free( ply->comments[i] );
-			free( ply->comments );
-			for( int i=0 ; i<ply->num_obj_info ; i++ ) free( ply->obj_info[i] );
-			free( ply->obj_info );
-			ply_free_other_elements( ply->other_elems );
-			
-			for( int i=0 ; i<nr_elems ; i++ ) free( elist[i] );
-			free( elist );
-			ply_close( ply );
-			return 0;
-		}		
-		if( equal_strings( "vertex" , elem_name ) )
-			for( int i=0 ; i<propertyNum ; i++ )
-				if( readFlags ) readFlags[i] = ply_get_property( ply , elem_name , &properties[i] )!=0;
+	for( int i=0 ; i<elist.size() ; i++ ) if( elist[i]=="vertex" ) for( int j=0 ; j<propertyNum ; j++ ) if( readFlags ) readFlags[j] = ply->get_property( elist[i].c_str() , &properties[j] )!=0;
 
-		for( int j=0 ; j<nr_props ; j++ )
-		{
-			free( plist[j]->name );
-			free( plist[j] );
-		}
-		free( plist );
-	}  // for each type of element
-	
-	for( int i=0 ; i<nr_elems ; i++ )
-	{
-		free( ply->elems[i]->name );
-		free( ply->elems[i]->store_prop );
-		for( int j=0 ; j<ply->elems[i]->nprops ; j++ )
-		{
-			free( ply->elems[i]->props[j]->name );
-			free( ply->elems[i]->props[j] );
-		}
-		if( ply->elems[i]->props && ply->elems[i]->nprops ) free(ply->elems[i]->props);
-	}
-	for( int i=0 ; i<nr_elems ; i++ ) free(ply->elems[i]);
-	free( ply->elems) ;
-	for( int i=0 ; i<ply->num_comments ; i++ ) free( ply->comments[i] );
-	free( ply->comments );
-	for( int i=0 ; i<ply->num_obj_info ; i++ ) free( ply->obj_info[i] );
-	free( ply->obj_info );
-	ply_free_other_elements(ply->other_elems);
-	
-	
-	for( int i=0 ; i<nr_elems ; i++ ) free( elist[i] );
-	free( elist );
-	ply_close( ply );
+	delete ply;
 	return true;
 }
-inline bool PlyReadHeader( char* fileName , PlyProperty* properties , int propertyNum , bool* readFlags )
+inline bool PlyReadHeader( char* fileName , const PlyProperty* properties , int propertyNum , bool* readFlags )
 {
 	int file_type;
 	return PlyReadHeader( fileName , properties , propertyNum , readFlags , file_type );
 }
 
 
-template<class Vertex>
-int PlyReadPolygons(char* fileName,
-					std::vector<Vertex>& vertices,std::vector<std::vector<int> >& polygons,
-					PlyProperty* properties,int propertyNum,
-					int& file_type,
-					char*** comments=NULL,int* commentNum=NULL , bool* readFlags=NULL );
+template< class Vertex >
+int PlyReadPolygons( const char* fileName,
+	std::vector< Vertex >& vertices , std::vector<std::vector<int> >& polygons ,
+	const PlyProperty* properties , int propertyNum ,
+	int& file_type ,
+	std::vector< std::string > &comments , bool* readFlags=NULL );
 
 template<class Vertex>
-int PlyWritePolygons(char* fileName,
-					 const std::vector<Vertex>& vertices,const std::vector<std::vector<int> >& polygons,
-					 PlyProperty* properties,int propertyNum,
-					 int file_type,
-					 char** comments=NULL,const int& commentNum=0);
-
-template<class Vertex>
-int PlyWritePolygons(char* fileName,
-					 const std::vector<Vertex>& vertices , const std::vector< std::vector< int > >& polygons,
-					 PlyProperty* properties,int propertyNum,
-					 int file_type,
-					 char** comments,const int& commentNum)
+int PlyWritePolygons( const char* fileName ,
+	const std::vector< Vertex > &vertices , const std::vector< std::vector< int > > &polygons ,
+	const PlyProperty* properties , int propertyNum ,
+	int file_type ,
+	const std::vector< std::string > &comments );
+
+template< class Vertex >
+int PlyWritePolygons( const char* fileName ,
+	const std::vector< Vertex > &vertices , const std::vector< std::vector< int > > &polygons ,
+	const PlyProperty *properties , int propertyNum ,
+	int file_type ,
+	const std::vector< std::string > &comments )
 {
 	int nr_vertices=int(vertices.size());
 	int nr_faces=int(polygons.size());
 	float version;
-	const char *elem_names[] = { "vertex" , "face" };
-	PlyFile *ply = ply_open_for_writing( fileName , 2 , elem_names , file_type , &version );
+	std::vector< std::string > elem_names = { std::string( "vertex" ) , std::string( "face" ) };
+	PlyFile *ply = PlyFile::Write( fileName , elem_names , file_type , version );
 	if (!ply){return 0;}
-	
+
 	//
 	// describe vertex and face properties
 	//
-	ply_element_count(ply, "vertex", nr_vertices);
-	for(int i=0;i<propertyNum;i++)
-		ply_describe_property(ply, "vertex", &properties[i]);
-	
-	ply_element_count(ply, "face", nr_faces);
-	ply_describe_property(ply, "face", &face_props[0]);
-	
+	ply->element_count( "vertex", nr_vertices );
+	for( int i=0 ; i<propertyNum ; i++ ) ply->describe_property( "vertex" , &properties[i] );
+	ply->element_count( "face" , nr_faces );
+	ply->describe_property( "face" , &face_props[0] );
+
 	// Write in the comments
-	if(comments && commentNum)
-		for(int i=0;i<commentNum;i++)
-			ply_put_comment(ply,comments[i]);
+	for( int i=0 ; i<comments.size() ; i++ ) ply->put_comment( comments[i] );
+	ply->header_complete();
 
-	ply_header_complete(ply);
-	
 	// write vertices
-	ply_put_element_setup(ply, "vertex");
-	for (int i=0; i < int(vertices.size()); i++)
-		ply_put_element(ply, (void *) &vertices[i]);
+	ply->put_element_setup( elem_names[0] );
+	for( int i=0 ; i<(int)vertices.size() ; i++ ) ply->put_element( (void *)&vertices[i] );
 
 	// write faces
 	PlyFace ply_face;
@@ -656,182 +330,126 @@ int PlyWritePolygons(char* fileName,
 	ply_face.nr_vertices = 3;
 	ply_face.vertices = new int[3];
 
-	ply_put_element_setup(ply, "face");
-	for (int i=0; i < nr_faces; i++)
+	ply->put_element_setup( elem_names[1] );
+	for( int i=0 ; i<nr_faces ; i++ )
 	{
-		if(int(polygons[i].size())>maxFaceVerts)
+		if( (int)polygons[i].size()>maxFaceVerts )
 		{
 			delete[] ply_face.vertices;
-			maxFaceVerts=int(polygons[i].size());
-			ply_face.vertices=new int[maxFaceVerts];
+			maxFaceVerts = (int)polygons[i].size();
+			ply_face.vertices=new int[ maxFaceVerts ];
 		}
-		ply_face.nr_vertices=int(polygons[i].size());
-		for(int j=0;j<ply_face.nr_vertices;j++)
-			ply_face.vertices[j]=polygons[i][j];
-		ply_put_element(ply, (void *) &ply_face);
+		ply_face.nr_vertices = (int)polygons[i].size();
+		for( int j=0 ; j<ply_face.nr_vertices ; j++ ) ply_face.vertices[j] = polygons[i][j];
+		ply->put_element( (void *)&ply_face );
 	}
 
 	delete[] ply_face.vertices;
-	ply_close(ply);
+	delete ply;
+
 	return 1;
 }
-template<class Vertex>
-int PlyReadPolygons(char* fileName,
-					std::vector<Vertex>& vertices , std::vector<std::vector<int> >& polygons ,
-					 PlyProperty* properties , int propertyNum ,
-					int& file_type ,
-					char*** comments , int* commentNum , bool* readFlags )
+template< class Vertex >
+int PlyReadPolygons( const char *fileName ,
+	std::vector< Vertex > &vertices , std::vector< std::vector< int > > &polygons ,
+	const PlyProperty *properties , int propertyNum ,
+	int &file_type ,
+	std::vector< std::string > &comments , bool *readFlags )
 {
-	int nr_elems;
-	char **elist;
+	std::vector< std::string > elist;
 	float version;
-	int i,j,k;
-	PlyFile* ply;
-	char* elem_name;
-	int num_elems;
-	int nr_props;
-	PlyProperty** plist;
-	PlyFace ply_face;
 
-	ply = ply_open_for_reading(fileName, &nr_elems, &elist, &file_type, &version);
+	PlyFile *ply = PlyFile::Read( fileName , elist , file_type , version );
 	if(!ply) return 0;
 
-	if(comments)
-	{
-		(*comments)=new char*[*commentNum+ply->num_comments];
-		for(int i=0;i<ply->num_comments;i++)
-			(*comments)[i]=_strdup(ply->comments[i]);
-		*commentNum=ply->num_comments;
-	}
+	comments.reserve( comments.size() + ply->comments.size() );
+	for( int i=0 ; i<ply->comments.size() ; i++ ) comments.push_back( ply->comments[i] );
 
-	for (i=0; i < nr_elems; i++) {
-		elem_name = elist[i];
-		plist = ply_get_element_description(ply, elem_name, &num_elems, &nr_props);
-		if(!plist)
+	for( int i=0 ; i<elist.size() ; i++ )
+	{
+		std::string &elem_name = elist[i];
+		int num_elems;
+		std::vector< PlyProperty * > plist = ply->get_element_description( elem_name , num_elems );
+		if( !plist.size() )
 		{
-			for(i=0;i<nr_elems;i++){
-				free(ply->elems[i]->name);
-				free(ply->elems[i]->store_prop);
-				for(j=0;j<ply->elems[i]->nprops;j++){
-					free(ply->elems[i]->props[j]->name);
-					free(ply->elems[i]->props[j]);
-				}
-				free(ply->elems[i]->props);
-			}
-			for(i=0;i<nr_elems;i++){free(ply->elems[i]);}
-			free(ply->elems);
-			for(i=0;i<ply->num_comments;i++){free(ply->comments[i]);}
-			free(ply->comments);
-			for(i=0;i<ply->num_obj_info;i++){free(ply->obj_info[i]);}
-			free(ply->obj_info);
-			ply_free_other_elements (ply->other_elems);
-			
-			for(i=0;i<nr_elems;i++){free(elist[i]);}
-			free(elist);
-			ply_close(ply);
+			delete ply;
 			return 0;
 		}		
-		if (equal_strings("vertex", elem_name))
+		if( elem_name=="vertex" )
 		{
 			for( int i=0 ; i<propertyNum ; i++)
 			{
-				int hasProperty = ply_get_property(ply,elem_name,&properties[i]);
+				int hasProperty = ply->get_property( elem_name , &properties[i] );
 				if( readFlags ) readFlags[i] = (hasProperty!=0);
 			}
-			vertices.resize(num_elems);
-			for (j=0; j < num_elems; j++)	ply_get_element (ply, (void *) &vertices[j]);
+			vertices.resize( num_elems );
+			for( int j=0 ; j<num_elems ; j++ ) ply->get_element( (void *)&vertices[j] );
 		}
-		else if (equal_strings("face", elem_name))
+		else if( elem_name=="face" )
 		{
-			ply_get_property (ply, elem_name, &face_props[0]);
-			polygons.resize(num_elems);
-			for (j=0; j < num_elems; j++)
+			ply->get_property( elem_name , &face_props[0] );
+			polygons.resize( num_elems );
+			for( int j=0 ; j<num_elems ; j++ )
 			{
-				ply_get_element (ply, (void *) &ply_face);
-				polygons[j].resize(ply_face.nr_vertices);
-				for(k=0;k<ply_face.nr_vertices;k++)	polygons[j][k]=ply_face.vertices[k];
-				delete[] ply_face.vertices;
+				PlyFace ply_face;
+				ply->get_element( (void *)&ply_face );
+				polygons[j].resize( ply_face.nr_vertices );
+				for( int k=0 ; k<ply_face.nr_vertices ; k++ ) polygons[j][k] = ply_face.vertices[k];
+				free( ply_face.vertices );
 			}  // for, read faces
 		}  // if face
-		else{ply_get_other_element (ply, elem_name, num_elems);}
+		else ply->get_other_element( elem_name , num_elems );
 
-		for(j=0;j<nr_props;j++){
-			free(plist[j]->name);
-			free(plist[j]);
-		}
-		free(plist);
+		for( int j=0 ; j<plist.size() ; j++ ) delete plist[j];
 	}  // for each type of element
-	
-	for(i=0;i<nr_elems;i++){
-		free(ply->elems[i]->name);
-		free(ply->elems[i]->store_prop);
-		for(j=0;j<ply->elems[i]->nprops;j++){
-			free(ply->elems[i]->props[j]->name);
-			free(ply->elems[i]->props[j]);
-		}
-		if(ply->elems[i]->props && ply->elems[i]->nprops){free(ply->elems[i]->props);}
-	}
-	for(i=0;i<nr_elems;i++){free(ply->elems[i]);}
-	free(ply->elems);
-	for(i=0;i<ply->num_comments;i++){free(ply->comments[i]);}
-	free(ply->comments);
-	for(i=0;i<ply->num_obj_info;i++){free(ply->obj_info[i]);}
-	free(ply->obj_info);
-	ply_free_other_elements (ply->other_elems);
-	
-	
-	for(i=0;i<nr_elems;i++){free(elist[i]);}
-	free(elist);
-	ply_close(ply);
+
+	delete ply;
 	return 1;
 }
 
-template< class Vertex , class Real >
-int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >* mesh , int file_type , const Point3D<float>& translate , float scale , char** comments , int commentNum , XForm4x4< Real > xForm )
+template< class Vertex , class Real , int Dim >
+int PlyWritePolygons( const char* fileName , CoredMeshData< Vertex >* mesh , int file_type , const Point< float , Dim >& translate , float scale , const std::vector< std::string > &comments , XForm< Real , Dim+1 > xForm )
 {
-	int i;
 	int nr_vertices=int(mesh->outOfCorePointCount()+mesh->inCorePoints.size());
 	int nr_faces=mesh->polygonCount();
 	float version;
-	const char *elem_names[] = { "vertex" , "face" };
-	PlyFile *ply = ply_open_for_writing( fileName , 2 , elem_names , file_type , &version );
+	std::vector< std::string > elem_names = { std::string( "vertex" ) , std::string( "face" ) };
+	PlyFile *ply = PlyFile::Write( fileName , elem_names , file_type , version );
 	if( !ply ) return 0;
 
 	mesh->resetIterator();
-	
+
 	//
 	// describe vertex and face properties
 	//
-	ply_element_count( ply , "vertex" , nr_vertices );
-	for( int i=0 ; i<Vertex::Components ; i++ ) ply_describe_property( ply , "vertex" , &Vertex::Properties[i] );
-	
-	ply_element_count( ply , "face" , nr_faces );
-	ply_describe_property( ply , "face" , &face_props[0] );
-	
+	ply->element_count( "vertex" , nr_vertices );
+	for( int i=0 ; i<Vertex::Components ; i++ ) ply->describe_property( "vertex" , &Vertex::Properties[i] );
+	ply->element_count( "face" , nr_faces );
+	ply->describe_property( "face" , &face_props[0] );
+
 	// Write in the comments
-	for( i=0 ; i<commentNum ; i++ ) ply_put_comment( ply , comments[i] );
+	for( int i=0 ; i<comments.size() ; i++ ) ply->put_comment( comments[i] );
+	ply->header_complete();
 
-	ply_header_complete( ply );
-	
 	// write vertices
-	ply_put_element_setup( ply , "vertex" );
-	for( i=0 ; i<int( mesh->inCorePoints.size() ) ; i++ )
+	ply->put_element_setup( "vertex" );
+	for( int i=0 ; i<int( mesh->inCorePoints.size() ) ; i++ )
 	{
 		Vertex vertex = xForm * ( mesh->inCorePoints[i] * scale + translate );
-		ply_put_element(ply, (void *) &vertex);
+		ply->put_element( (void *)&vertex );
 	}
-	for( i=0; i<mesh->outOfCorePointCount() ; i++ )
+	for( int i=0; i<mesh->outOfCorePointCount() ; i++ )
 	{
 		Vertex vertex;
 		mesh->nextOutOfCorePoint( vertex );
-		vertex = xForm * ( vertex * scale +translate );
-		ply_put_element(ply, (void *) &vertex);		
+		vertex = xForm * ( vertex * scale + translate );
+		ply->put_element( (void *)&vertex );
 	}  // for, write vertices
-	
-	// write faces
+
+	   // write faces
 	std::vector< CoredVertexIndex > polygon;
-	ply_put_element_setup( ply , "face" );
-	for( i=0 ; i<nr_faces ; i++ )
+	ply->put_element_setup( "face" );
+	for( int i=0 ; i<nr_faces ; i++ )
 	{
 		//
 		// create and fill a struct that the ply code can handle
@@ -840,62 +458,61 @@ int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >* mesh , int file_
 		mesh->nextPolygon( polygon );
 		ply_face.nr_vertices = int( polygon.size() );
 		ply_face.vertices = new int[ polygon.size() ];
-		for( int i=0 ; i<int(polygon.size()) ; i++ )
-			if( polygon[i].inCore ) ply_face.vertices[i] = polygon[i].idx;
-			else                    ply_face.vertices[i] = polygon[i].idx + int( mesh->inCorePoints.size() );
-		ply_put_element( ply, (void *) &ply_face );
-		delete[] ply_face.vertices;
+		for( int j=0 ; j<int(polygon.size()) ; j++ )
+			if( polygon[j].inCore ) ply_face.vertices[j] = polygon[j].idx;
+			else                    ply_face.vertices[j] = polygon[j].idx + int( mesh->inCorePoints.size() );
+			ply->put_element( (void *)&ply_face );
+			delete[] ply_face.vertices;
 	}  // for, write faces
-	
-	ply_close( ply );
+
+	delete ply;
+
 	return 1;
 }
-template< class Vertex , class Real >
-int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >* mesh , int file_type , char** comments , int commentNum , XForm4x4< Real > xForm )
+template< class Vertex , class Real , int Dim >
+int PlyWritePolygons( const char* fileName , CoredMeshData< Vertex >* mesh , int file_type , const std::vector< std::string > &comments , XForm< Real , Dim+1 > xForm )
 {
-	int i;
 	int nr_vertices=int(mesh->outOfCorePointCount()+mesh->inCorePoints.size());
 	int nr_faces=mesh->polygonCount();
 	float version;
-	const char *elem_names[] = { "vertex" , "face" };
-	PlyFile *ply = ply_open_for_writing( fileName , 2 , elem_names , file_type , &version );
+	std::vector< std::string > elem_names = { std::string( "vertex" ) , std::string( "face" ) };
+	PlyFile *ply = PlyFile::Write( fileName , elem_names , file_type , version );
 	if( !ply ) return 0;
 
 	mesh->resetIterator();
-	
+
 	//
 	// describe vertex and face properties
 	//
-	ply_element_count( ply , "vertex" , nr_vertices );
-	for( int i=0 ; i<Vertex::WriteComponents ; i++ ) ply_describe_property( ply , "vertex" , &Vertex::WriteProperties[i] );
-	
-	ply_element_count( ply , "face" , nr_faces );
-	ply_describe_property( ply , "face" , &face_props[0] );
-	
-	// Write in the comments
-	for( i=0 ; i<commentNum ; i++ ) ply_put_comment( ply , comments[i] );
+	ply->element_count( "vertex" , nr_vertices );
+	typename Vertex::Transform _xForm( xForm );
+	const PlyProperty* PlyWriteProperties = Vertex::PlyWriteProperties();
+	for( int i=0 ; i<Vertex::PlyWriteNum ; i++ ) ply->describe_property( "vertex" , &PlyWriteProperties[i] );
+	ply->element_count( "face" , nr_faces );
+	ply->describe_property( "face" , &face_props[0] );
 
-	ply_header_complete( ply );
-	
+	// Write in the comments
+	for( int i=0 ; i<comments.size() ; i++ ) ply->put_comment( comments[i] );
+	ply->header_complete();
 	// write vertices
-	ply_put_element_setup( ply , "vertex" );
-	for( i=0 ; i<int( mesh->inCorePoints.size() ) ; i++ )
+	ply->put_element_setup( "vertex" );
+	for( int i=0 ; i<int( mesh->inCorePoints.size() ) ; i++ )
 	{
-		Vertex vertex = xForm * mesh->inCorePoints[i];
-		ply_put_element(ply, (void *) &vertex);
+		Vertex vertex = _xForm( mesh->inCorePoints[i] );
+		ply->put_element( (void *)&vertex );
 	}
-	for( i=0; i<mesh->outOfCorePointCount() ; i++ )
+	for( int i=0; i<mesh->outOfCorePointCount() ; i++ )
 	{
 		Vertex vertex;
 		mesh->nextOutOfCorePoint( vertex );
-		vertex = xForm * ( vertex );
-		ply_put_element(ply, (void *) &vertex);		
+		vertex = _xForm( vertex );
+		ply->put_element( (void *)&vertex );
 	}  // for, write vertices
-	
-	// write faces
+
+	   // write faces
 	std::vector< CoredVertexIndex > polygon;
-	ply_put_element_setup( ply , "face" );
-	for( i=0 ; i<nr_faces ; i++ )
+	ply->put_element_setup( "face" );
+	for( int i=0 ; i<nr_faces ; i++ )
 	{
 		//
 		// create and fill a struct that the ply code can handle
@@ -904,14 +521,15 @@ int PlyWritePolygons( char* fileName , CoredMeshData< Vertex >* mesh , int file_
 		mesh->nextPolygon( polygon );
 		ply_face.nr_vertices = int( polygon.size() );
 		ply_face.vertices = new int[ polygon.size() ];
-		for( int i=0 ; i<int(polygon.size()) ; i++ )
-			if( polygon[i].inCore ) ply_face.vertices[i] = polygon[i].idx;
-			else                    ply_face.vertices[i] = polygon[i].idx + int( mesh->inCorePoints.size() );
-		ply_put_element( ply, (void *) &ply_face );
-		delete[] ply_face.vertices;
+		for( int j=0 ; j<int(polygon.size()) ; j++ )
+			if( polygon[j].inCore ) ply_face.vertices[j] = polygon[j].idx;
+			else                    ply_face.vertices[j] = polygon[j].idx + int( mesh->inCorePoints.size() );
+			ply->put_element( (void *)&ply_face );
+			delete[] ply_face.vertices;
 	}  // for, write faces
-	
-	ply_close( ply );
+
+	delete ply;
+
 	return 1;
 }
 inline int PlyDefaultFileType(void){return PLY_ASCII;}
diff --git a/Src/PlyFile.cpp b/Src/PlyFile.cpp
index 9d3f37c..2af0aa1 100644
--- a/Src/PlyFile.cpp
+++ b/Src/PlyFile.cpp
@@ -1,44 +1,45 @@
 /*
 
- The interface routines for reading and writing PLY polygon files.
- 
-  Greg Turk, February 1994
-  
-   ---------------------------------------------------------------
-   
-	A PLY file contains a single polygonal _object_.
-	
-	 An object is composed of lists of _elements_.  Typical elements are
-	 vertices, faces, edges and materials.
-	 
-	  Each type of element for a given object has one or more _properties_
-	  associated with the element type.  For instance, a vertex element may
-	  have as properties the floating-point values x,y,z and the three unsigned
-	  chars representing red, green and blue.
-	  
-	   ---------------------------------------------------------------
-	   
-		Copyright (c) 1994 The Board of Trustees of The Leland Stanford
-		Junior University.  All rights reserved.   
-		
-		 Permission to use, copy, modify and distribute this software and its   
-		 documentation for any purpose is hereby granted without fee, provided   
-		 that the above copyright notice and this permission notice appear in   
-		 all copies of this software and that you do not sell the software.   
-		 
-		  THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
-		  EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
-		  WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
-		  
+The interface routines for reading and writing PLY polygon files.
+
+Greg Turk, February 1994
+
+---------------------------------------------------------------
+
+A PLY file contains a single polygonal _object_.
+
+An object is composed of lists of _elements_.  Typical elements are
+vertices, faces, edges and materials.
+
+Each type of element for a given object has one or more _properties_
+associated with the element type.  For instance, a vertex element may
+have as properties the floating-point values x,y,z and the three unsigned
+chars representing red, green and blue.
+
+---------------------------------------------------------------
+
+Copyright (c) 1994 The Board of Trustees of The Leland Stanford
+Junior University.  All rights reserved.   
+
+Permission to use, copy, modify and distribute this software and its   
+documentation for any purpose is hereby granted without fee, provided   
+that the above copyright notice and this permission notice appear in   
+all copies of this software and that you do not sell the software.   
+
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
 */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
-#include "Ply.h"
+#include "PlyFile.h"
+#include "MyMiscellany.h"
 
-const char *type_names[] = {
+const char *type_names[] =
+{
 	"invalid",
 	"char",
 	"short",
@@ -57,10 +58,10 @@ const char *type_names[] = {
 	"uint32",     // unsigned integer          4
 	"float32",    // single-precision float    4
 	"float64",    // double-precision float    8
-
 };
 
-int ply_type_size[] = {
+int ply_type_size[] =
+{
 	0,
 	1,
 	2,
@@ -99,59 +100,35 @@ static int types_checked = 0;
 #define NAMED_PROP       1
 
 
-/* returns 1 if strings are equal, 0 if not */
-int equal_strings(const char *, const char *);
-
-/* find an element in a plyfile's list */
-PlyElement *find_element(PlyFile *, const char *);
-
-/* find a property in an element's list */
-PlyProperty *find_property(PlyElement *, const char *, int *);
-
 /* write to a file the word describing a PLY file data type */
-void write_scalar_type (FILE *, int);
+void write_scalar_type( FILE * , int );
 
 /* read a line from a file and break it up into separate words */
-char **get_words(FILE *, int *, char **);
-char **old_get_words(FILE *, int *);
-
-/* write an item to a file */
-void write_binary_item(FILE *, int, int, unsigned int, double, int);
-void write_ascii_item(FILE *, int, unsigned int, double, int);
-double old_write_ascii_item(FILE *, char *, int);
+std::vector< std::string > get_words( FILE * , char ** );
 
-/* add information to a PLY file descriptor */
-void add_element(PlyFile *, char **);
-void add_property(PlyFile *, char **);
-void add_comment(PlyFile *, char *);
-void add_obj_info(PlyFile *, char *);
+/* write to a file the word describing a PLY file data type */
+void write_scalar_type( FILE * , int );
 
-/* copy a property */
-void copy_property(PlyProperty *, PlyProperty *);
+/* write an item to a file */
+void write_binary_item( FILE * , int , int , unsigned int , double , int );
+void write_ascii_item ( FILE * ,       int , unsigned int , double , int );
 
 /* store a value into where a pointer and a type specify */
-void store_item(char *, int, int, unsigned int, double);
+void store_item( void * , int , int , unsigned int , double );
 
 /* return the value of a stored item */
-void get_stored_item( void *, int, int *, unsigned int *, double *);
+void get_stored_item( void * , int , int & , unsigned int & , double & );
 
 /* return the value stored in an item, given ptr to it and its type */
-double get_item_value(char *, int);
+double get_item_value( const void * , int );
 
 /* get binary or ascii item and store it according to ptr and type */
-void get_ascii_item(char *, int, int *, unsigned int *, double *);
-void get_binary_item(FILE *, int, int, int *, unsigned int *, double *);
-
-/* get a bunch of elements from a file */
-void ascii_get_element(PlyFile *, char *);
-void binary_get_element(PlyFile *, char *);
-
-/* memory allocation */
-char *my_alloc(int, int, const char *);
+void get_ascii_item( const std::string & , int , int & , unsigned int & , double & );
+void get_binary_item( FILE * , int       , int , int & , unsigned int & , double & );
 
 /* byte ordering */
 void get_native_binary_type();
-void swap_bytes(char *, int);
+void swap_bytes( void * , int );
 
 void check_types();
 
@@ -163,119 +140,75 @@ void check_types();
 /******************************************************************************
 Given a file pointer, get ready to write PLY data to the file.
 
- Entry:
- fp         - the given file pointer
- nelems     - number of elements in object
- elem_names - list of element names
- file_type  - file type, either ascii or binary
- 
-  Exit:
-  returns a pointer to a PlyFile, used to refer to this file, or NULL if error
+Entry:
+fp         - the given file pointer
+nelems     - number of elements in object
+elem_names - list of element names
+file_type  - file type, either ascii or binary
+
+Exit:
+returns a pointer to a PlyFile, used to refer to this file, or NULL if error
 ******************************************************************************/
 
-PlyFile *ply_write(
-				   FILE *fp,
-				   int nelems,
-				   const char **elem_names,
-				   int file_type
-				   )
+PlyFile *PlyFile::_Write( FILE *fp , const std::vector< std::string > &elem_names , int file_type )
 {
-	int i;
-	PlyFile *plyfile;
-	PlyElement *elem;
-	
 	/* check for NULL file pointer */
-	if (fp == NULL)
-		return (NULL);
-	
-	if (native_binary_type == -1)
-		get_native_binary_type();
-	if (!types_checked)
-		check_types();
-	
+	if( fp==NULL ) return NULL;
+
+	if( native_binary_type==-1 ) get_native_binary_type();
+	if( !types_checked ) check_types();
+
 	/* create a record for this object */
-	
-	plyfile = (PlyFile *) myalloc (sizeof (PlyFile));
-	if (file_type == PLY_BINARY_NATIVE)
-		plyfile->file_type = native_binary_type;
-	else
-		plyfile->file_type = file_type;
-	plyfile->num_comments = 0;
-	plyfile->num_obj_info = 0;
-	plyfile->nelems = nelems;
-	plyfile->version = 1.0;
-	plyfile->fp = fp;
-	plyfile->other_elems = NULL;
-	
+
+	PlyFile *plyfile = new PlyFile( fp );
+	if( file_type==PLY_BINARY_NATIVE ) plyfile->file_type = native_binary_type;
+	else                               plyfile->file_type = file_type;
+
 	/* tuck aside the names of the elements */
-	
-	plyfile->elems = (PlyElement **) myalloc (sizeof (PlyElement *) * nelems);
-	for (i = 0; i < nelems; i++) {
-		elem = (PlyElement *) myalloc (sizeof (PlyElement));
-		plyfile->elems[i] = elem;
-		elem->name = _strdup (elem_names[i]);
-		elem->num = 0;
-		elem->nprops = 0;
+	plyfile->elems.resize( elem_names.size() );
+	for( int i=0 ; i<elem_names.size() ; i++ )
+	{
+		plyfile->elems[i].name = elem_names[i];
+		plyfile->elems[i].num =  0;
 	}
-	
+
 	/* return pointer to the file descriptor */
-	return (plyfile);
+	return plyfile;
 }
 
 
 /******************************************************************************
 Open a polygon file for writing.
 
- Entry:
- filename   - name of file to read from
- nelems     - number of elements in object
- elem_names - list of element names
- file_type  - file type, either ascii or binary
- 
-  Exit:
-  version - version number of PLY file
-  returns a file identifier, used to refer to this file, or NULL if error
+Entry:
+filename   - name of file to read from
+nelems     - number of elements in object
+elem_names - list of element names
+file_type  - file type, either ascii or binary
+
+Exit:
+version - version number of PLY file
+returns a file identifier, used to refer to this file, or NULL if error
 ******************************************************************************/
 
-PlyFile *ply_open_for_writing(
-							  char *filename,
-							  int nelems,
-							  const char **elem_names,
-							  int file_type,
-							  float *version
-							  )
+PlyFile *PlyFile::Write( const std::string &filename , const std::vector< std::string > &elem_names , int file_type , float &version )
 {
-	PlyFile *plyfile;
-	char *name;
-	FILE *fp;
-	
 	/* tack on the extension .ply, if necessary */
-	
-	name = (char *) myalloc (int(sizeof (char) * (strlen (filename)) + 5));
-	strcpy (name, filename);
-	if (strlen (name) < 4 ||
-		strcmp (name + strlen (name) - 4, ".ply") != 0)
-		strcat (name, ".ply");
-	
+	std::string name = filename;
+	if( name.length()<4 || name.substr( name.length()-4 )!=".ply" ) name += ".ply";
+
 	/* open the file for writing */
-	
-	fp = fopen (name, "wb");
-	free(name);
-	if (fp == NULL) {
-		return (NULL);
-	}
-	
+	FILE *fp = fopen( name.c_str() , "wb" );
+	if( fp==NULL ) return NULL;
+
 	/* create the actual PlyFile structure */
-	
-	plyfile = ply_write (fp, nelems, elem_names, file_type);
-	if (plyfile == NULL)
-		return (NULL);
-	
+	PlyFile *plyfile = _Write( fp , elem_names , file_type );
+
 	/* say what PLY file version number we're writing */
-	*version = plyfile->version;
-	
+	version = plyfile->version;
+
 	/* return pointer to the file descriptor */
-	return (plyfile);
+	return plyfile;
 }
 
 
@@ -283,97 +216,46 @@ PlyFile *ply_open_for_writing(
 Describe an element, including its properties and how many will be written
 to the file.
 
- Entry:
- plyfile   - file identifier
- elem_name - name of element that information is being specified about
- nelems    - number of elements of this type to be written
- nprops    - number of properties contained in the element
- prop_list - list of properties
+Entry:
+elem_name - name of element that information is being specified about
+nelems    - number of elements of this type to be written
+nprops    - number of properties contained in the element
+prop_list - list of properties
 ******************************************************************************/
 
-void ply_describe_element(
-						  PlyFile *plyfile,
-						  char *elem_name,
-						  int nelems,
-						  int nprops,
-						  PlyProperty *prop_list
-						  )
+void PlyFile::describe_element( const std::string &elem_name , int nelems , int nprops , const PlyProperty *prop_list )
 {
-	int i;
-	PlyElement *elem;
-	PlyProperty *prop;
-	
 	/* look for appropriate element */
-	elem = find_element (plyfile, elem_name);
-	if (elem == NULL) {
-		fprintf(stderr,"ply_describe_element: can't find element '%s'\n",elem_name);
-		exit (-1);
-	}
-	
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL ) ERROR_OUT( "Can't find element '%s'" , elem_name.c_str() );
+
 	elem->num = nelems;
-	
+
 	/* copy the list of properties */
-	
-	elem->nprops = nprops;
-	elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *) * nprops);
-	elem->store_prop = (char *) myalloc (sizeof (char) * nprops);
-	
-	for (i = 0; i < nprops; i++) {
-		prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-		elem->props[i] = prop;
-		elem->store_prop[i] = NAMED_PROP;
-		copy_property (prop, &prop_list[i]);
-	}
+	elem->props.resize( nprops );
+	for( int i=0 ; i<nprops ; i++ ) elem->props[i] = PlyStoredProperty( prop_list[i] , NAMED_PROP );
 }
 
 
 /******************************************************************************
 Describe a property of an element.
 
- Entry:
- plyfile   - file identifier
- elem_name - name of element that information is being specified about
- prop      - the new property
+Entry:
+elem_name - name of element that information is being specified about
+prop      - the new property
 ******************************************************************************/
 
-void ply_describe_property(
-						   PlyFile *plyfile,
-						   const char *elem_name,
-						   PlyProperty *prop
-						   )
+void PlyFile::describe_property( const std::string &elem_name , const PlyProperty *prop )
 {
-	PlyElement *elem;
-	PlyProperty *elem_prop;
-	
 	/* look for appropriate element */
-	elem = find_element (plyfile, elem_name);
-	if (elem == NULL) {
-		fprintf(stderr, "ply_describe_property: can't find element '%s'\n",
-            elem_name);
+	PlyElement *elem = find_element( elem_name );
+	if( elem == NULL )
+	{
+		WARN( "Can't find element '%s'" , elem_name.c_str() );
 		return;
 	}
-	
-	/* create room for new property */
-	
-	if (elem->nprops == 0) {
-		elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *));
-		elem->store_prop = (char *) myalloc (sizeof (char));
-		elem->nprops = 1;
-	}
-	else {
-		elem->nprops++;
-		elem->props = (PlyProperty **)
-			realloc (elem->props, sizeof (PlyProperty *) * elem->nprops);
-		elem->store_prop = (char *)
-			realloc (elem->store_prop, sizeof (char) * elem->nprops);
-	}
-	
-	/* copy the new property */
-	
-	elem_prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-	elem->props[elem->nprops - 1] = elem_prop;
-	elem->store_prop[elem->nprops - 1] = NAMED_PROP;
-	copy_property (elem_prop, prop);
+
+	elem->props.push_back( PlyStoredProperty( *prop , NAMED_PROP ) );
 }
 
 
@@ -382,53 +264,21 @@ Describe what the "other" properties are that are to be stored, and where
 they are in an element.
 ******************************************************************************/
 
-void ply_describe_other_properties(
-								   PlyFile *plyfile,
-								   PlyOtherProp *other,
-								   int offset
-								   )
+void PlyFile::describe_other_properties( const PlyOtherProp &other , int offset )
 {
-	int i;
-	PlyElement *elem;
-	PlyProperty *prop;
-	
 	/* look for appropriate element */
-	elem = find_element (plyfile, other->name);
-	if (elem == NULL) {
-		fprintf(stderr, "ply_describe_other_properties: can't find element '%s'\n",
-            other->name);
+	PlyElement *elem = find_element( other.name );
+	if( elem==NULL )
+	{
+		WARN( "Can't find element '%s'" , other.name.c_str() );
 		return;
 	}
-	
-	/* create room for other properties */
-	
-	if (elem->nprops == 0) {
-		elem->props = (PlyProperty **)
-			myalloc (sizeof (PlyProperty *) * other->nprops);
-		elem->store_prop = (char *) myalloc (sizeof (char) * other->nprops);
-		elem->nprops = 0;
-	}
-	else {
-		int newsize;
-		newsize = elem->nprops + other->nprops;
-		elem->props = (PlyProperty **)
-			realloc (elem->props, sizeof (PlyProperty *) * newsize);
-		elem->store_prop = (char *)
-			realloc (elem->store_prop, sizeof (char) * newsize);
-	}
-	
-	/* copy the other properties */
-	
-	for (i = 0; i < other->nprops; i++) {
-		prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-		copy_property (prop, other->props[i]);
-		elem->props[elem->nprops] = prop;
-		elem->store_prop[elem->nprops] = OTHER_PROP;
-		elem->nprops++;
-	}
-	
+
+	elem->props.reserve( elem->props.size() + other.props.size() );
+	for( int i=0 ; i<other.props.size() ; i++ ) elem->props.push_back( PlyStoredProperty( other.props[i] , OTHER_PROP ) );
+
 	/* save other info about other properties */
-	elem->other_size = other->size;
+	elem->other_size = other.size;
 	elem->other_offset = offset;
 }
 
@@ -436,27 +286,16 @@ void ply_describe_other_properties(
 /******************************************************************************
 State how many of a given element will be written.
 
- Entry:
- plyfile   - file identifier
- elem_name - name of element that information is being specified about
- nelems    - number of elements of this type to be written
+Entry:
+elem_name - name of element that information is being specified about
+nelems    - number of elements of this type to be written
 ******************************************************************************/
-
-void ply_element_count(
-					   PlyFile *plyfile,
-					   const char *elem_name,
-					   int nelems
-					   )
+void PlyFile::element_count( const std::string &elem_name , int nelems )
 {
-	PlyElement *elem;
-	
 	/* look for appropriate element */
-	elem = find_element (plyfile, elem_name);
-	if (elem == NULL) {
-		fprintf(stderr,"ply_element_count: can't find element '%s'\n",elem_name);
-		exit (-1);
-	}
-	
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL ) ERROR_OUT( "Can't find element '%s'" , elem_name.c_str() );
+
 	elem->num = nelems;
 }
 
@@ -464,72 +303,50 @@ void ply_element_count(
 /******************************************************************************
 Signal that we've described everything a PLY file's header and that the
 header should be written to the file.
-
- Entry:
- plyfile - file identifier
 ******************************************************************************/
 
-void ply_header_complete(PlyFile *plyfile)
+void PlyFile::header_complete( void )
 {
-	int i,j;
-	FILE *fp = plyfile->fp;
-	PlyElement *elem;
-	PlyProperty *prop;
-	
-	fprintf (fp, "ply\n");
-	
-	switch (plyfile->file_type) {
-    case PLY_ASCII:
-		fprintf (fp, "format ascii 1.0\n");
-		break;
-    case PLY_BINARY_BE:
-		fprintf (fp, "format binary_big_endian 1.0\n");
-		break;
-    case PLY_BINARY_LE:
-		fprintf (fp, "format binary_little_endian 1.0\n");
-		break;
-    default:
-		fprintf (stderr, "ply_header_complete: bad file type = %d\n",
-			plyfile->file_type);
-		exit (-1);
+	fprintf( fp , "ply\n" );
+	switch( file_type )
+	{
+	case PLY_ASCII: fprintf( fp , "format ascii 1.0\n" )                    ; break;
+	case PLY_BINARY_BE: fprintf( fp , "format binary_big_endian 1.0\n" )    ; break;
+	case PLY_BINARY_LE: fprintf( fp , "format binary_little_endian 1.0\n" ) ; break;
+	default: ERROR_OUT( "Bad file type = %d" , file_type );
 	}
-	
+
 	/* write out the comments */
-	
-	for (i = 0; i < plyfile->num_comments; i++)
-		fprintf (fp, "comment %s\n", plyfile->comments[i]);
-	
+	for( int i=0 ; i<comments.size() ; i++ ) fprintf( fp , "comment %s\n" , comments[i].c_str() );
+
 	/* write out object information */
-	
-	for (i = 0; i < plyfile->num_obj_info; i++)
-		fprintf (fp, "obj_info %s\n", plyfile->obj_info[i]);
-	
+	for( int i=0 ; i<obj_info.size() ; i++ ) fprintf( fp , "obj_info %s\n" , obj_info[i].c_str() );
+
 	/* write out information about each element */
-	
-	for (i = 0; i < plyfile->nelems; i++) {
-		
-		elem = plyfile->elems[i];
-		fprintf (fp, "element %s %d\n", elem->name, elem->num);
-		
-		/* write out each property */
-		for (j = 0; j < elem->nprops; j++) {
-			prop = elem->props[j];
-			if (prop->is_list) {
-				fprintf (fp, "property list ");
-				write_scalar_type (fp, prop->count_external);
-				fprintf (fp, " ");
-				write_scalar_type (fp, prop->external_type);
-				fprintf (fp, " %s\n", prop->name);
+	for( int i=0 ; i<elems.size() ; i++ )
+	{
+		fprintf( fp , "element %s %d\n" , elems[i].name.c_str() , elems[i].num );
+
+		for( int j=0 ; j<elems[i].props.size() ; j++ )
+		{
+			if( elems[i].props[j].prop.is_list )
+			{
+				fprintf( fp , "property list " );
+				write_scalar_type( fp , elems[i].props[j].prop.count_external );
+				fprintf( fp , " " );
+				write_scalar_type( fp , elems[i].props[j].prop.external_type );
+				fprintf( fp , " %s\n", elems[i].props[j].prop.name.c_str() );
 			}
-			else {
-				fprintf (fp, "property ");
-				write_scalar_type (fp, prop->external_type);
-				fprintf (fp, " %s\n", prop->name);
+			else
+			{
+				fprintf( fp , "property " );
+				write_scalar_type( fp , elems[i].props[j].prop.external_type );
+				fprintf( fp , " %s\n", elems[i].props[j].prop.name.c_str() );
 			}
 		}
 	}
-	
-	fprintf (fp, "end_header\n");
+
+	fprintf( fp , "end_header\n" );
 }
 
 
@@ -537,22 +354,15 @@ void ply_header_complete(PlyFile *plyfile)
 Specify which elements are going to be written.  This should be called
 before a call to the routine ply_put_element().
 
- Entry:
- plyfile   - file identifier
- elem_name - name of element we're talking about
+Entry:
+elem_name - name of element we're talking about
 ******************************************************************************/
 
-void ply_put_element_setup(PlyFile *plyfile, const char *elem_name)
+void PlyFile::put_element_setup( const std::string &elem_name )
 {
-	PlyElement *elem;
-	
-	elem = find_element (plyfile, elem_name);
-	if (elem == NULL) {
-		fprintf(stderr, "ply_elements_setup: can't find element '%s'\n", elem_name);
-		exit (-1);
-	}
-	
-	plyfile->which_elem = elem;
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL ) ERROR_OUT( "Can't find element '%s'" , elem_name.c_str() );
+	which_elem = elem;
 }
 
 
@@ -561,17 +371,12 @@ Write an element to the file.  This routine assumes that we're
 writing the type of element specified in the last call to the routine
 ply_put_element_setup().
 
- Entry:
- plyfile  - file identifier
- elem_ptr - pointer to the element
+Entry:
+elem_ptr - pointer to the element
 ******************************************************************************/
 
-void ply_put_element(PlyFile *plyfile, void *elem_ptr)
+void PlyFile::put_element( void *elem_ptr )
 {
-	int j,k;
-	FILE *fp = plyfile->fp;
-	PlyElement *elem;
-	PlyProperty *prop;
 	char *elem_data,*item;
 	char **item_ptr;
 	int list_count;
@@ -580,93 +385,77 @@ void ply_put_element(PlyFile *plyfile, void *elem_ptr)
 	unsigned int uint_val;
 	double double_val;
 	char **other_ptr;
-	
-	elem = plyfile->which_elem;
+
+	PlyElement *elem = which_elem;
 	elem_data = (char *)elem_ptr;
 	other_ptr = (char **) (((char *) elem_ptr) + elem->other_offset);
-	
+
 	/* write out either to an ascii or binary file */
-	
-	if (plyfile->file_type == PLY_ASCII) {
-		
-		/* write an ascii file */
-		
+
+	if( file_type==PLY_ASCII )	/* write an ascii file */
+	{
 		/* write out each property of the element */
-		for (j = 0; j < elem->nprops; j++) {
-			prop = elem->props[j];
-			if (elem->store_prop[j] == OTHER_PROP)
-				elem_data = *other_ptr;
-			else
-				elem_data = (char *)elem_ptr;
-			if (prop->is_list) {
-				item = elem_data + prop->count_offset;
-				get_stored_item ((void *) item, prop->count_internal,
-					&int_val, &uint_val, &double_val);
-				write_ascii_item (fp, int_val, uint_val, double_val,
-					prop->count_external);
+		for( int j=0 ; j<elem->props.size() ; j++ )
+		{
+			if( elem->props[j].store==OTHER_PROP ) elem_data = *other_ptr;
+			else                                   elem_data = (char *)elem_ptr;
+			if( elem->props[j].prop.is_list )
+			{
+				item = elem_data + elem->props[j].prop.count_offset;
+				get_stored_item( (void *)item , elem->props[j].prop.count_internal , int_val , uint_val , double_val );
+				write_ascii_item( fp , int_val , uint_val , double_val , elem->props[j].prop.count_external );
 				list_count = uint_val;
-				item_ptr = (char **) (elem_data + prop->offset);
+				item_ptr = (char **)( elem_data + elem->props[j].prop.offset );
 				item = item_ptr[0];
-				item_size = ply_type_size[prop->internal_type];
-				for (k = 0; k < list_count; k++) {
-					get_stored_item ((void *) item, prop->internal_type,
-						&int_val, &uint_val, &double_val);
-					write_ascii_item (fp, int_val, uint_val, double_val,
-						prop->external_type);
+				item_size = ply_type_size[ elem->props[j].prop.internal_type ];
+				for( int k=0 ; k<list_count ; k++ )
+				{
+					get_stored_item( (void *)item , elem->props[j].prop.internal_type , int_val , uint_val , double_val );
+					write_ascii_item( fp , int_val , uint_val , double_val , elem->props[j].prop.external_type );
 					item += item_size;
 				}
 			}
-			else {
-				item = elem_data + prop->offset;
-				get_stored_item ((void *) item, prop->internal_type,
-					&int_val, &uint_val, &double_val);
-				write_ascii_item (fp, int_val, uint_val, double_val,
-					prop->external_type);
+			else
+			{
+				item = elem_data + elem->props[j].prop.offset;
+				get_stored_item( (void *)item , elem->props[j].prop.internal_type , int_val , uint_val , double_val );
+				write_ascii_item( fp , int_val , uint_val , double_val , elem->props[j].prop.external_type );
 			}
 		}
-		
-		fprintf (fp, "\n");
+		fprintf( fp , "\n" );
 	}
-	else {
-		
-		/* write a binary file */
-		
+	else		/* write a binary file */
+	{
 		/* write out each property of the element */
-		for (j = 0; j < elem->nprops; j++) {
-			prop = elem->props[j];
-			if (elem->store_prop[j] == OTHER_PROP)
-				elem_data = *other_ptr;
-			else
-				elem_data = (char *)elem_ptr;
-			if (prop->is_list) {
-				item = elem_data + prop->count_offset;
-				item_size = ply_type_size[prop->count_internal];
-				get_stored_item ((void *) item, prop->count_internal,
-					&int_val, &uint_val, &double_val);
-				write_binary_item (fp, plyfile->file_type, int_val, uint_val,
-					double_val, prop->count_external);
+		for( int j=0 ; j<elem->props.size() ; j++ )
+		{
+			if (elem->props[j].store==OTHER_PROP ) elem_data = *other_ptr;
+			else                                   elem_data = (char *)elem_ptr;
+			if( elem->props[j].prop.is_list )
+			{
+				item = elem_data + elem->props[j].prop.count_offset;
+				item_size = ply_type_size[ elem->props[j].prop.count_internal ];
+				get_stored_item( (void *)item , elem->props[j].prop.count_internal , int_val , uint_val , double_val );
+				write_binary_item( fp , file_type , int_val , uint_val , double_val , elem->props[j].prop.count_external );
 				list_count = uint_val;
-				item_ptr = (char **) (elem_data + prop->offset);
+				item_ptr = (char **)( elem_data + elem->props[j].prop.offset );
 				item = item_ptr[0];
-				item_size = ply_type_size[prop->internal_type];
-				for (k = 0; k < list_count; k++) {
-					get_stored_item ((void *) item, prop->internal_type,
-						&int_val, &uint_val, &double_val);
-					write_binary_item (fp, plyfile->file_type, int_val, uint_val,
-						double_val, prop->external_type);
+				item_size = ply_type_size[ elem->props[j].prop.internal_type ];
+				for( int k=0 ; k<list_count ; k++ )
+				{
+					get_stored_item( (void *)item , elem->props[j].prop.internal_type , int_val , uint_val , double_val );
+					write_binary_item( fp , file_type , int_val , uint_val , double_val , elem->props[j].prop.external_type );
 					item += item_size;
 				}
 			}
-			else {
-				item = elem_data + prop->offset;
-				item_size = ply_type_size[prop->internal_type];
-				get_stored_item ((void *) item, prop->internal_type,
-					&int_val, &uint_val, &double_val);
-				write_binary_item (fp, plyfile->file_type, int_val, uint_val,
-					double_val, prop->external_type);
+			else
+			{
+				item = elem_data + elem->props[j].prop.offset;
+				item_size = ply_type_size[ elem->props[j].prop.internal_type ];
+				get_stored_item( (void *)item , elem->props[j].prop.internal_type , int_val , uint_val , double_val );
+				write_binary_item( fp , file_type , int_val , uint_val , double_val , elem->props[j].prop.external_type );
 			}
 		}
-		
 	}
 }
 
@@ -674,2057 +463,1387 @@ void ply_put_element(PlyFile *plyfile, void *elem_ptr)
 /******************************************************************************
 Specify a comment that will be written in the header.
 
- Entry:
- plyfile - file identifier
- comment - the comment to be written
- ******************************************************************************/
- 
- void ply_put_comment(PlyFile *plyfile, char *comment)
- {
-	 /* (re)allocate space for new comment */
-	 if (plyfile->num_comments == 0)
-		 plyfile->comments = (char **) myalloc (sizeof (char *));
-	 else
-		 plyfile->comments = (char **) realloc (plyfile->comments,
-		 sizeof (char *) * (plyfile->num_comments + 1));
-	 
-	 /* add comment to list */
-	 plyfile->comments[plyfile->num_comments] = _strdup (comment);
-	 plyfile->num_comments++;
- }
- 
- 
- /******************************************************************************
- Specify a piece of object information (arbitrary text) that will be written
- in the header.
- 
-  Entry:
-  plyfile  - file identifier
-  obj_info - the text information to be written
- ******************************************************************************/
- 
- void ply_put_obj_info(PlyFile *plyfile, char *obj_info)
- {
-	 /* (re)allocate space for new info */
-	 if (plyfile->num_obj_info == 0)
-		 plyfile->obj_info = (char **) myalloc (sizeof (char *));
-	 else
-		 plyfile->obj_info = (char **) realloc (plyfile->obj_info,
-		 sizeof (char *) * (plyfile->num_obj_info + 1));
-	 
-	 /* add info to list */
-	 plyfile->obj_info[plyfile->num_obj_info] = _strdup (obj_info);
-	 plyfile->num_obj_info++;
- }
- 
- 
- 
- 
- 
- 
- 
- /*************/
- /*  Reading  */
- /*************/
- 
- 
- 
- /******************************************************************************
- Given a file pointer, get ready to read PLY data from the file.
- 
-  Entry:
-  fp - the given file pointer
-  
-   Exit:
-   nelems     - number of elements in object
-   elem_names - list of element names
-   returns a pointer to a PlyFile, used to refer to this file, or NULL if error
- ******************************************************************************/
- 
- PlyFile *ply_read(FILE *fp, int *nelems, char ***elem_names)
- {
-	 int i,j;
-	 PlyFile *plyfile;
-	 int nwords;
-	 char **words;
-	 char **elist;
-	 PlyElement *elem;
-	 char *orig_line;
-	 /* check for NULL file pointer */
-	 if (fp == NULL)
-		 return (NULL);
-	 
-	 if (native_binary_type == -1)
-		 get_native_binary_type();
-	 if (!types_checked)
-		 check_types();
-	 /* create record for this object */
-	 
-	 plyfile = (PlyFile *) myalloc (sizeof (PlyFile));
-	 plyfile->nelems = 0;
-	 plyfile->comments = NULL;
-	 plyfile->num_comments = 0;
-	 plyfile->obj_info = NULL;
-	 plyfile->num_obj_info = 0;
-	 plyfile->fp = fp;
-	 plyfile->other_elems = NULL;
-	 
-	 /* read and parse the file's header */
-	 
-	 words = get_words (plyfile->fp, &nwords, &orig_line);
-	 if (!words || !equal_strings (words[0], "ply"))
-	 {
-		 if (words)
-			 free(words);
-		 return (NULL);
-	 }
-	 while (words) {
-		 /* parse words */
-		 
-		 if (equal_strings (words[0], "format")) {
-			 if (nwords != 3) {
-				 free(words);
-				 return (NULL);
-			 }
-			 if (equal_strings (words[1], "ascii"))
-				 plyfile->file_type = PLY_ASCII;
-			 else if (equal_strings (words[1], "binary_big_endian"))
-				 plyfile->file_type = PLY_BINARY_BE;
-			 else if (equal_strings (words[1], "binary_little_endian"))
-				 plyfile->file_type = PLY_BINARY_LE;
-			 else {
-				 free(words);
-				 return (NULL);
-			 }
-			 plyfile->version = (float)atof (words[2]);
-		 }
-		 else if (equal_strings (words[0], "element"))
-			 add_element (plyfile, words);
-		 else if (equal_strings (words[0], "property"))
-			 add_property (plyfile, words);
-		 else if (equal_strings (words[0], "comment"))
-			 add_comment (plyfile, orig_line);
-		 else if (equal_strings (words[0], "obj_info"))
-			 add_obj_info (plyfile, orig_line);
-		 else if (equal_strings (words[0], "end_header")) {
-			 free(words);
-			 break;
-		 }
-		 
-		 /* free up words space */
-		 free (words);
-		 
-		 words = get_words (plyfile->fp, &nwords, &orig_line);
-	 }
-	 
-	 /* create tags for each property of each element, to be used */
-	 /* later to say whether or not to store each property for the user */
-	 
-	 for (i = 0; i < plyfile->nelems; i++) {
-		 elem = plyfile->elems[i];
-		 elem->store_prop = (char *) myalloc (sizeof (char) * elem->nprops);
-		 for (j = 0; j < elem->nprops; j++)
-			 elem->store_prop[j] = DONT_STORE_PROP;
-		 elem->other_offset = NO_OTHER_PROPS; /* no "other" props by default */
-	 }
-	 
-	 /* set return values about the elements */
-	 
-	 elist = (char **) myalloc (sizeof (char *) * plyfile->nelems);
-	 for (i = 0; i < plyfile->nelems; i++)
-		 elist[i] = _strdup (plyfile->elems[i]->name);
-	 
-	 *elem_names = elist;
-	 *nelems = plyfile->nelems;
-	 
-	 /* return a pointer to the file's information */
-	 
-	 return (plyfile);
+Entry:
+comment - the comment to be written
+******************************************************************************/
+
+void PlyFile::put_comment( const std::string &comment ){ comments.push_back( comment ); }
+
+
+/******************************************************************************
+Specify a piece of object information (arbitrary text) that will be written
+in the header.
+
+Entry:
+obj_info - the text information to be written
+******************************************************************************/
+
+void PlyFile::put_obj_info( const std::string &obj_info ){ this->obj_info.push_back( obj_info ); }
+
+
+/*************/
+/*  Reading  */
+/*************/
+
+
+
+/******************************************************************************
+Given a file pointer, get ready to read PLY data from the file.
+
+Entry:
+fp - the given file pointer
+
+Exit:
+nelems     - number of elements in object
+elem_names - list of element names
+returns a pointer to a PlyFile, used to refer to this file, or NULL if error
+******************************************************************************/
+
+PlyFile *PlyFile::_Read( FILE *fp , std::vector< std::string > &elem_names )
+{
+	char *orig_line;
+	/* check for NULL file pointer */
+	if( fp==NULL ) return NULL;
+
+	if( native_binary_type==-1 ) get_native_binary_type();
+	if( !types_checked ) check_types();
+
+	/* create record for this object */
+	std::vector< std::string > words;
+	PlyFile *plyfile = new PlyFile( fp );
+
+	/* read and parse the file's header */
+	words = get_words( plyfile->fp , &orig_line );
+	if( !words.size() || words[0]!="ply" ) return NULL;
+	while( words.size() )
+	{
+		/* parse words */
+		if( words[0]=="format" )
+		{
+			if( words.size()!=3 ) return NULL;
+			if     ( words[1]=="ascii"                ) plyfile->file_type = PLY_ASCII;
+			else if( words[1]=="binary_big_endian"    ) plyfile->file_type = PLY_BINARY_BE;
+			else if( words[1]=="binary_little_endian" ) plyfile->file_type = PLY_BINARY_LE;
+			else return NULL;
+			plyfile->version = (float)atof( words[2].c_str() );
+		}
+		else if( words[0]=="element"    ) plyfile->add_element ( words );
+		else if( words[0]=="property"   ) plyfile->add_property( words );
+		else if( words[0]=="comment"    ) plyfile->add_comment ( orig_line );
+		else if( words[0]=="obj_info"   ) plyfile->add_obj_info( orig_line );
+		else if( words[0]=="end_header" ) break;
+
+		words = get_words( plyfile->fp , &orig_line );
+	}
+
+	/* create tags for each property of each element, to be used */
+	/* later to say whether or not to store each property for the user */
+	for( int i=0 ; i<plyfile->elems.size() ; i++ )
+	{
+		for( int j=0 ; j<plyfile->elems[i].props.size() ; j++ ) plyfile->elems[i].props[j].store = DONT_STORE_PROP;
+		plyfile->elems[i].other_offset = NO_OTHER_PROPS; /* no "other" props by default */
+	}
+
+	/* set return values about the elements */
+	elem_names.resize( plyfile->elems.size() );
+	for( int i=0 ; i<elem_names.size() ; i++ ) elem_names[i] = plyfile->elems[i].name;
+
+	/* return a pointer to the file's information */
+	return plyfile;
 }
 
 
 /******************************************************************************
 Open a polygon file for reading.
 
- Entry:
- filename - name of file to read from
- 
-  Exit:
-  nelems     - number of elements in object
-  elem_names - list of element names
-  file_type  - file type, either ascii or binary
-  version    - version number of PLY file
-  returns a file identifier, used to refer to this file, or NULL if error
-  ******************************************************************************/
-  
-  PlyFile *ply_open_for_reading(
-	  char *filename,
-	  int *nelems,
-	  char ***elem_names,
-	  int *file_type,
-	  float *version
-	  )
-  {
-	  FILE *fp;
-	  PlyFile *plyfile;
-	  char *name;
-	  
-	  /* tack on the extension .ply, if necessary */
-	  
-	  name = (char *) myalloc (int(sizeof (char) * (strlen (filename) + 5)));
-	  strcpy (name, filename);
-	  if (strlen (name) < 4 ||
-		  strcmp (name + strlen (name) - 4, ".ply") != 0)
-		  strcat (name, ".ply");
-	  
-	  /* open the file for reading */
-	  
-	  fp = fopen (name, "rb");
-	  free(name);
-	  if (fp == NULL)
-		  return (NULL);
-	  
-	  /* create the PlyFile data structure */
-	  
-	  plyfile = ply_read (fp, nelems, elem_names);
-	  
-	  /* determine the file type and version */
-	  
-	  *file_type = plyfile->file_type;
-	  *version = plyfile->version;
-	  
-	  /* return a pointer to the file's information */
-	  
-	  return (plyfile);
-  }
-  
-  
-  /******************************************************************************
-  Get information about a particular element.
-  
-   Entry:
-   plyfile   - file identifier
-   elem_name - name of element to get information about
-   
-	Exit:
-	nelems   - number of elements of this type in the file
-	nprops   - number of properties
-	returns a list of properties, or NULL if the file doesn't contain that elem
-  ******************************************************************************/
-  
-  PlyProperty **ply_get_element_description(
-	  PlyFile *plyfile,
-	  char *elem_name,
-	  int *nelems,
-	  int *nprops
-	  )
-  {
-	  int i;
-	  PlyElement *elem;
-	  PlyProperty *prop;
-	  PlyProperty **prop_list;
-	  
-	  /* find information about the element */
-	  elem = find_element (plyfile, elem_name);
-	  if (elem == NULL)
-		  return (NULL);
-	  
-	  *nelems = elem->num;
-	  *nprops = elem->nprops;
-	  
-	  /* make a copy of the element's property list */
-	  prop_list = (PlyProperty **) myalloc (sizeof (PlyProperty *) * elem->nprops);
-	  for (i = 0; i < elem->nprops; i++) {
-		  prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-		  copy_property (prop, elem->props[i]);
-		  prop_list[i] = prop;
-	  }
-	  
-	  /* return this duplicate property list */
-	  return (prop_list);
-  }
-  
-  
-  /******************************************************************************
-  Specify which properties of an element are to be returned.  This should be
-  called before a call to the routine ply_get_element().
-  
-   Entry:
-   plyfile   - file identifier
-   elem_name - which element we're talking about
-   nprops    - number of properties
-   prop_list - list of properties
-  ******************************************************************************/
-  
-  void ply_get_element_setup(
-	  PlyFile *plyfile,
-	  char *elem_name,
-	  int nprops,
-	  PlyProperty *prop_list
-	  )
-  {
-	  int i;
-	  PlyElement *elem;
-	  PlyProperty *prop;
-	  int index;
-	  
-	  /* find information about the element */
-	  elem = find_element (plyfile, elem_name);
-	  plyfile->which_elem = elem;
-	  
-	  /* deposit the property information into the element's description */
-	  for (i = 0; i < nprops; i++) {
-		  
-		  /* look for actual property */
-		  prop = find_property (elem, prop_list[i].name, &index);
-		  if (prop == NULL) {
-			  fprintf (stderr, "Warning:  Can't find property '%s' in element '%s'\n",
-				  prop_list[i].name, elem_name);
-			  continue;
-		  }
-		  
-		  /* store its description */
-		  prop->internal_type = prop_list[i].internal_type;
-		  prop->offset = prop_list[i].offset;
-		  prop->count_internal = prop_list[i].count_internal;
-		  prop->count_offset = prop_list[i].count_offset;
-		  
-		  /* specify that the user wants this property */
-		  elem->store_prop[index] = STORE_PROP;
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Specify a property of an element that is to be returned.  This should be
-  called (usually multiple times) before a call to the routine ply_get_element().
-  This routine should be used in preference to the less flexible old routine
-  called ply_get_element_setup().
-  
-   Entry:
-   plyfile   - file identifier
-   elem_name - which element we're talking about
-   prop      - property to add to those that will be returned
-  ******************************************************************************/
-  
-  int ply_get_property(
-	  PlyFile *plyfile,
-	  char *elem_name,
-	  PlyProperty *prop
-	  )
-  {
-	  PlyElement *elem;
-	  PlyProperty *prop_ptr;
-	  int index;
-	  
-	  /* find information about the element */
-	  elem = find_element (plyfile, elem_name);
-	  plyfile->which_elem = elem;
-	  
-	  /* deposit the property information into the element's description */
-	  
-	  prop_ptr = find_property (elem, prop->name, &index);
-	  if (prop_ptr == NULL) {
-//		  fprintf (stderr, "Warning:  Can't find property '%s' in element '%s'\n",
-//			  prop->name, elem_name);
-//		  return;
-		  return 0;
-	  }
-	  prop_ptr->internal_type  = prop->internal_type;
-	  prop_ptr->offset         = prop->offset;
-	  prop_ptr->count_internal = prop->count_internal;
-	  prop_ptr->count_offset   = prop->count_offset;
-	  
-	  /* specify that the user wants this property */
-	  elem->store_prop[index] = STORE_PROP;
-	  return 1;
-  }
-  
-  
-  /******************************************************************************
-  Read one element from the file.  This routine assumes that we're reading
-  the type of element specified in the last call to the routine
-  ply_get_element_setup().
-  
-   Entry:
-   plyfile  - file identifier
-   elem_ptr - pointer to location where the element information should be put
-  ******************************************************************************/
-  
-  void ply_get_element(PlyFile *plyfile, void *elem_ptr)
-  {
-	  if (plyfile->file_type == PLY_ASCII)
-		  ascii_get_element (plyfile, (char *) elem_ptr);
-	  else
-		  binary_get_element (plyfile, (char *) elem_ptr);
-  }
-  
-  
-  /******************************************************************************
-  Extract the comments from the header information of a PLY file.
-  
-   Entry:
-   plyfile - file identifier
-   
-	Exit:
-	num_comments - number of comments returned
-	returns a pointer to a list of comments
-  ******************************************************************************/
-  
-  char **ply_get_comments(PlyFile *plyfile, int *num_comments)
-  {
-	  *num_comments = plyfile->num_comments;
-	  return (plyfile->comments);
-  }
-  
-  
-  /******************************************************************************
-  Extract the object information (arbitrary text) from the header information
-  of a PLY file.
-  
-   Entry:
-   plyfile - file identifier
-   
-	Exit:
-	num_obj_info - number of lines of text information returned
-	returns a pointer to a list of object info lines
-  ******************************************************************************/
-  
-  char **ply_get_obj_info(PlyFile *plyfile, int *num_obj_info)
-  {
-	  *num_obj_info = plyfile->num_obj_info;
-	  return (plyfile->obj_info);
-  }
-  
-  
-  /******************************************************************************
-  Make ready for "other" properties of an element-- those properties that
-  the user has not explicitly asked for, but that are to be stashed away
-  in a special structure to be carried along with the element's other
-  information.
-  
-   Entry:
-   plyfile - file identifier
-   elem    - element for which we want to save away other properties
-  ******************************************************************************/
-  
-  void setup_other_props(PlyElement *elem)
-  {
-	  int i;
-	  PlyProperty *prop;
-	  int size = 0;
-	  int type_size;
-	  
-	  /* Examine each property in decreasing order of size. */
-	  /* We do this so that all data types will be aligned by */
-	  /* word, half-word, or whatever within the structure. */
-	  
-	  for (type_size = 8; type_size > 0; type_size /= 2) {
-		  
-		  /* add up the space taken by each property, and save this information */
-		  /* away in the property descriptor */
-		  
-		  for (i = 0; i < elem->nprops; i++) {
-			  
-			  /* don't bother with properties we've been asked to store explicitly */
-			  if (elem->store_prop[i])
-				  continue;
-			  
-			  prop = elem->props[i];
-			  
-			  /* internal types will be same as external */
-			  prop->internal_type = prop->external_type;
-			  prop->count_internal = prop->count_external;
-			  
-			  /* check list case */
-			  if (prop->is_list) {
-				  
-				  /* pointer to list */
-				  if (type_size == sizeof (void *)) {
-					  prop->offset = size;
-					  size += sizeof (void *);    /* always use size of a pointer here */
-				  }
-				  
-				  /* count of number of list elements */
-				  if (type_size == ply_type_size[prop->count_external]) {
-					  prop->count_offset = size;
-					  size += ply_type_size[prop->count_external];
-				  }
-			  }
-			  /* not list */
-			  else if (type_size == ply_type_size[prop->external_type]) {
-				  prop->offset = size;
-				  size += ply_type_size[prop->external_type];
-			  }
-		  }
-		  
-	  }
-	  
-	  /* save the size for the other_props structure */
-	  elem->other_size = size;
-  }
-  
-  
-  /******************************************************************************
-  Specify that we want the "other" properties of an element to be tucked
-  away within the user's structure.  The user needn't be concerned for how
-  these properties are stored.
-  
-   Entry:
-   plyfile   - file identifier
-   elem_name - name of element that we want to store other_props in
-   offset    - offset to where other_props will be stored inside user's structure
-   
-	Exit:
-	returns pointer to structure containing description of other_props
-  ******************************************************************************/
-  
-  PlyOtherProp *ply_get_other_properties(
-	  PlyFile *plyfile,
-	  char *elem_name,
-	  int offset
-	  )
-  {
-	  int i;
-	  PlyElement *elem;
-	  PlyOtherProp *other;
-	  PlyProperty *prop;
-	  int nprops;
-	  
-	  /* find information about the element */
-	  elem = find_element (plyfile, elem_name);
-	  if (elem == NULL) {
-		  fprintf (stderr, "ply_get_other_properties: Can't find element '%s'\n",
-			  elem_name);
-		  return (NULL);
-	  }
-	  
-	  /* remember that this is the "current" element */
-	  plyfile->which_elem = elem;
-	  
-	  /* save the offset to where to store the other_props */
-	  elem->other_offset = offset;
-	  
-	  /* place the appropriate pointers, etc. in the element's property list */
-	  setup_other_props (elem);
-	  
-	  /* create structure for describing other_props */
-	  other = (PlyOtherProp *) myalloc (sizeof (PlyOtherProp));
-	  other->name = _strdup (elem_name);
-	  other->size = elem->other_size;
-	  other->props = (PlyProperty **) myalloc (sizeof(PlyProperty) * elem->nprops);
-	  
-	  /* save descriptions of each "other" property */
-	  nprops = 0;
-	  for (i = 0; i < elem->nprops; i++) {
-		  if (elem->store_prop[i])
-			  continue;
-		  prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-		  copy_property (prop, elem->props[i]);
-		  other->props[nprops] = prop;
-		  nprops++;
-	  }
-	  other->nprops = nprops;
-	  
-	  /* set other_offset pointer appropriately if there are NO other properties */
-	  if (other->nprops == 0) {
-		  elem->other_offset = NO_OTHER_PROPS;
-	  }
-	  
-	  /* return structure */
-	  return (other);
-  }
-  
-  
-  
-  
-  /*************************/
-  /*  Other Element Stuff  */
-  /*************************/
-  
-  
-  
-  
-  /******************************************************************************
-  Grab all the data for an element that a user does not want to explicitly
-  read in.
-  
-   Entry:
-   plyfile    - pointer to file
-   elem_name  - name of element whose data is to be read in
-   elem_count - number of instances of this element stored in the file
-   
-	Exit:
-	returns pointer to ALL the "other" element data for this PLY file
-  ******************************************************************************/
-  
-  PlyOtherElems *ply_get_other_element (
-	  PlyFile *plyfile,
-	  char *elem_name,
-	  int elem_count
-	  )
-  {
-	  int i;
-	  PlyElement *elem;
-	  PlyOtherElems *other_elems;
-	  OtherElem *other;
-	  
-	  /* look for appropriate element */
-	  elem = find_element (plyfile, elem_name);
-	  if (elem == NULL) {
-		  fprintf (stderr,
-			  "ply_get_other_element: can't find element '%s'\n", elem_name);
-		  exit (-1);
-	  }
-	  
-	  /* create room for the new "other" element, initializing the */
-	  /* other data structure if necessary */
-	  
-	  if (plyfile->other_elems == NULL) {
-		  plyfile->other_elems = (PlyOtherElems *) myalloc (sizeof (PlyOtherElems));
-		  other_elems = plyfile->other_elems;
-		  other_elems->other_list = (OtherElem *) myalloc (sizeof (OtherElem));
-		  other = &(other_elems->other_list[0]);
-		  other_elems->num_elems = 1;
-	  }
-	  else {
-		  other_elems = plyfile->other_elems;
-		  other_elems->other_list = (OtherElem *) realloc (other_elems->other_list,
-			  sizeof (OtherElem) * other_elems->num_elems + 1);
-		  other = &(other_elems->other_list[other_elems->num_elems]);
-		  other_elems->num_elems++;
-	  }
-	  
-	  /* count of element instances in file */
-	  other->elem_count = elem_count;
-	  
-	  /* save name of element */
-	  other->elem_name = _strdup (elem_name);
-	  
-	  /* create a list to hold all the current elements */
-	  other->other_data = (OtherData **)
-		  malloc (sizeof (OtherData *) * other->elem_count);
-	  
-	  /* set up for getting elements */
-	  other->other_props = ply_get_other_properties (plyfile, elem_name,
-		  offsetof(OtherData,other_props));
-	  
-	  /* grab all these elements */
-	  for (i = 0; i < other->elem_count; i++) {
-		  /* grab and element from the file */
-		  other->other_data[i] = (OtherData *) malloc (sizeof (OtherData));
-		  ply_get_element (plyfile, (void *) other->other_data[i]);
-	  }
-	  
-	  /* return pointer to the other elements data */
-	  return (other_elems);
-  }
-  
-  
-  /******************************************************************************
-  Pass along a pointer to "other" elements that we want to save in a given
-  PLY file.  These other elements were presumably read from another PLY file.
-  
-   Entry:
-   plyfile     - file pointer in which to store this other element info
-   other_elems - info about other elements that we want to store
-  ******************************************************************************/
-  
-  void ply_describe_other_elements (
-	  PlyFile *plyfile,
-	  PlyOtherElems *other_elems
-	  )
-  {
-	  int i;
-	  OtherElem *other;
-	  PlyElement *elem;
-	  
-	  /* ignore this call if there is no other element */
-	  if (other_elems == NULL)
-		  return;
-	  
-	  /* save pointer to this information */
-	  plyfile->other_elems = other_elems;
-	  
-	  /* describe the other properties of this element */
-	  /* store them in the main element list as elements with
-	  only other properties */
-	  
-	  REALLOCN(plyfile->elems, PlyElement *,
-		  plyfile->nelems, plyfile->nelems + other_elems->num_elems);
-	  for (i = 0; i < other_elems->num_elems; i++) {
-		  other = &(other_elems->other_list[i]);
-		  elem = (PlyElement *) myalloc (sizeof (PlyElement));
-		  plyfile->elems[plyfile->nelems++] = elem;
-		  elem->name = _strdup (other->elem_name);
-		  elem->num = other->elem_count;
-		  elem->nprops = 0;
-		  ply_describe_other_properties (plyfile, other->other_props,
-			  offsetof(OtherData,other_props));
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Write out the "other" elements specified for this PLY file.
-  
-   Entry:
-   plyfile - pointer to PLY file to write out other elements for
-  ******************************************************************************/
-  
-  void ply_put_other_elements (PlyFile *plyfile)
-  {
-	  int i,j;
-	  OtherElem *other;
-	  
-	  /* make sure we have other elements to write */
-	  if (plyfile->other_elems == NULL)
-		  return;
-	  
-	  /* write out the data for each "other" element */
-	  
-	  for (i = 0; i < plyfile->other_elems->num_elems; i++) {
-		  
-		  other = &(plyfile->other_elems->other_list[i]);
-		  ply_put_element_setup (plyfile, other->elem_name);
-		  
-		  /* write out each instance of the current element */
-		  for (j = 0; j < other->elem_count; j++)
-			  ply_put_element (plyfile, (void *) other->other_data[j]);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Free up storage used by an "other" elements data structure.
-  
-   Entry:
-   other_elems - data structure to free up
-  ******************************************************************************/
-  
-  void ply_free_other_elements (PlyOtherElems *other_elems)
-  {
-	  other_elems = other_elems;
-  }
-  
-  
-  
-  /*******************/
-  /*  Miscellaneous  */
-  /*******************/
-  
-  
-  
-  /******************************************************************************
-  Close a PLY file.
-  
-   Entry:
-   plyfile - identifier of file to close
-  ******************************************************************************/
-  
-  void ply_close(PlyFile *plyfile)
-  {
-	  fclose (plyfile->fp);
-	  
-	  /* free up memory associated with the PLY file */
-	  free (plyfile);
-  }
-  
-  
-  /******************************************************************************
-  Get version number and file type of a PlyFile.
-  
-   Entry:
-   ply - pointer to PLY file
-   
-	Exit:
-	version - version of the file
-	file_type - PLY_ASCII, PLY_BINARY_BE, or PLY_BINARY_LE
-  ******************************************************************************/
-  
-  void ply_get_info(PlyFile *ply, float *version, int *file_type)
-  {
-	  if (ply == NULL)
-		  return;
-	  
-	  *version = ply->version;
-	  *file_type = ply->file_type;
-  }
-  
-  
-  /******************************************************************************
-  Compare two strings.  Returns 1 if they are the same, 0 if not.
-  ******************************************************************************/
-  
-  int equal_strings(const char *s1, const char *s2)
-  {
-	  
-	  while (*s1 && *s2)
-		  if (*s1++ != *s2++)
-			  return (0);
-		  
-		  if (*s1 != *s2)
-			  return (0);
-		  else
-			  return (1);
-  }
-  
-  
-  /******************************************************************************
-  Find an element from the element list of a given PLY object.
-  
-   Entry:
-   plyfile - file id for PLY file
-   element - name of element we're looking for
-   
-	Exit:
-	returns the element, or NULL if not found
-  ******************************************************************************/
-  
-  PlyElement *find_element(PlyFile *plyfile, const char *element)
-  {
-	  int i;
-	  
-	  for (i = 0; i < plyfile->nelems; i++)
-		  if (equal_strings (element, plyfile->elems[i]->name))
-			  return (plyfile->elems[i]);
-		  
-		  return (NULL);
-  }
-  
-  
-  /******************************************************************************
-  Find a property in the list of properties of a given element.
-  
-   Entry:
-   elem      - pointer to element in which we want to find the property
-   prop_name - name of property to find
-   
-	Exit:
-	index - index to position in list
-	returns a pointer to the property, or NULL if not found
-  ******************************************************************************/
-  
-  PlyProperty *find_property(PlyElement *elem, const char *prop_name, int *index)
-  {
-	  int i;
-	  
-	  for (i = 0; i < elem->nprops; i++)
-		  if (equal_strings (prop_name, elem->props[i]->name)) {
-			  *index = i;
-			  return (elem->props[i]);
-		  }
-		  
-		  *index = -1;
-		  return (NULL);
-  }
-  
-  
-  /******************************************************************************
-  Read an element from an ascii file.
-  
-   Entry:
-   plyfile  - file identifier
-   elem_ptr - pointer to element
-  ******************************************************************************/
-  
-  void ascii_get_element(PlyFile *plyfile, char *elem_ptr)
-  {
-	  int j,k;
-	  PlyElement *elem;
-	  PlyProperty *prop;
-	  char **words;
-	  int nwords;
-	  int which_word;
-	  char *elem_data,*item=NULL;
-	  char *item_ptr;
-	  int item_size;
-	  int int_val;
-	  unsigned int uint_val;
-	  double double_val;
-	  int list_count;
-	  int store_it;
-	  char **store_array;
-	  char *orig_line;
-	  char *other_data=NULL;
-	  int other_flag;
-	  
-	  /* the kind of element we're reading currently */
-	  elem = plyfile->which_elem;
-	  
-	  /* do we need to setup for other_props? */
-	  
-	  if (elem->other_offset != NO_OTHER_PROPS) {
-		  char **ptr;
-		  other_flag = 1;
-		  /* make room for other_props */
-		  other_data = (char *) myalloc (elem->other_size);
-		  /* store pointer in user's structure to the other_props */
-		  ptr = (char **) (elem_ptr + elem->other_offset);
-		  *ptr = other_data;
-	  }
-	  else
-		  other_flag = 0;
-	  
-	  /* read in the element */
-	  
-	  words = get_words (plyfile->fp, &nwords, &orig_line);
-	  if (words == NULL) {
-		  fprintf (stderr, "ply_get_element: unexpected end of file\n");
-		  exit (-1);
-	  }
-	  
-	  which_word = 0;
-	  
-	  for (j = 0; j < elem->nprops; j++) {
-		  
-		  prop = elem->props[j];
-		  store_it = (elem->store_prop[j] | other_flag);
-		  
-		  /* store either in the user's structure or in other_props */
-		  if (elem->store_prop[j])
-			  elem_data = elem_ptr;
-		  else
-			  elem_data = other_data;
-		  
-		  if (prop->is_list) {       /* a list */
-			  
-			  /* get and store the number of items in the list */
-			  get_ascii_item (words[which_word++], prop->count_external,
-				  &int_val, &uint_val, &double_val);
-			  if (store_it) {
-				  item = elem_data + prop->count_offset;
-				  store_item(item, prop->count_internal, int_val, uint_val, double_val);
-			  }
-			  
-			  /* allocate space for an array of items and store a ptr to the array */
-			  list_count = int_val;
-			  item_size = ply_type_size[prop->internal_type];
-			  store_array = (char **) (elem_data + prop->offset);
-			  
-			  if (list_count == 0) {
-				  if (store_it)
-					  *store_array = NULL;
-			  }
-			  else {
-				  if (store_it) {
-					  item_ptr = (char *) myalloc (sizeof (char) * item_size * list_count);
-					  item = item_ptr;
-					  *store_array = item_ptr;
-				  }
-				  
-				  /* read items and store them into the array */
-				  for (k = 0; k < list_count; k++) {
-					  get_ascii_item (words[which_word++], prop->external_type,
-                          &int_val, &uint_val, &double_val);
-					  if (store_it) {
-						  store_item (item, prop->internal_type,
-							  int_val, uint_val, double_val);
-						  item += item_size;
-					  }
-				  }
-			  }
-			  
-		  }
-		  else {                     /* not a list */
-			  get_ascii_item (words[which_word++], prop->external_type,
-				  &int_val, &uint_val, &double_val);
-			  if (store_it) {
-				  item = elem_data + prop->offset;
-				  store_item (item, prop->internal_type, int_val, uint_val, double_val);
-			  }
-		  }
-		  
-	  }
-	  
-	  free (words);
+Entry:
+filename - name of file to read from
+
+Exit:
+nelems     - number of elements in object
+elem_names - list of element names
+file_type  - file type, either ascii or binary
+version    - version number of PLY file
+returns a file identifier, used to refer to this file, or NULL if error
+******************************************************************************/
+
+PlyFile *PlyFile::Read( const std::string &filename , std::vector< std::string > &elem_names , int &file_type , float &version )
+{
+	/* tack on the extension .ply, if necessary */
+	std::string name = filename;
+	if( name.length()<4 || name.substr( name.length()-4 )!=".ply" ) name += ".ply";
+
+	/* open the file for reading */
+	FILE *fp = fopen( name.c_str() , "rb" );
+	if( fp==NULL ) return NULL;
+
+	/* create the PlyFile data structure */
+	PlyFile *plyfile = _Read( fp , elem_names );
+
+	/* determine the file type and version */
+	file_type = plyfile->file_type;
+	version = plyfile->version;
+
+	/* return a pointer to the file's information */
+	return plyfile;
 }
 
 
+/******************************************************************************
+Get information about a particular element.
+
+Entry:
+elem_name - name of element to get information about
+
+Exit:
+nelems   - number of elements of this type in the file
+nprops   - number of properties
+returns a list of properties, or NULL if the file doesn't contain that elem
+******************************************************************************/
+
+std::vector< PlyProperty * > PlyFile::get_element_description( const std::string &elem_name , int &nelems )
+{
+	std::vector< PlyProperty * > prop_list;
+
+	/* find information about the element */
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL ) return prop_list;
+	nelems = elem->num;
+
+	/* make a copy of the element's property list */
+	prop_list.resize( elem->props.size() );
+	for( int i=0 ; i<elem->props.size() ; i++ ) prop_list[i] = new PlyProperty( elem->props[i].prop );
+
+	/* return this duplicate property list */
+	return prop_list;
+}
+
+/******************************************************************************
+Specify which properties of an element are to be returned.  This should be
+called before a call to the routine ply_get_element().
+
+Entry:
+elem_name - which element we're talking about
+nprops    - number of properties
+prop_list - list of properties
+******************************************************************************/
+
+void PlyFile::get_element_setup( const std::string &elem_name , int nprops , PlyProperty *prop_list )
+{
+	/* find information about the element */
+	PlyElement *elem = find_element( elem_name );
+	which_elem = elem;
+
+	/* deposit the property information into the element's description */
+	for( int i=0 ; i<nprops ; i++ )
+	{
+		/* look for actual property */
+		int index;
+		PlyProperty *prop = elem->find_property( prop_list[i].name , index );
+		if( prop==NULL )
+		{
+			WARN( "Can't find property '%s' in element '%s'" , prop_list[i].name.c_str() , elem_name.c_str() );
+			continue;
+		}
+
+		/* store its description */
+		prop->internal_type = prop_list[i].internal_type;
+		prop->offset = prop_list[i].offset;
+		prop->count_internal = prop_list[i].count_internal;
+		prop->count_offset = prop_list[i].count_offset;
+
+		/* specify that the user wants this property */
+		elem->props[index].store = STORE_PROP;
+	}
+}
+
+
+/******************************************************************************
+Specify a property of an element that is to be returned.  This should be
+called (usually multiple times) before a call to the routine ply_get_element().
+This routine should be used in preference to the less flexible old routine
+called ply_get_element_setup().
+
+Entry:
+elem_name - which element we're talking about
+prop      - property to add to those that will be returned
+******************************************************************************/
+
+int PlyFile::get_property( const std::string &elem_name , const PlyProperty *prop )
+{
+	/* find information about the element */
+	PlyElement *elem = find_element( elem_name );
+	which_elem = elem;
+
+	/* deposit the property information into the element's description */
+	int index;
+	PlyProperty *prop_ptr = elem->find_property( prop->name , index );
+	if( prop_ptr==NULL ) return 0;
+	prop_ptr->internal_type  = prop->internal_type;
+	prop_ptr->offset         = prop->offset;
+	prop_ptr->count_internal = prop->count_internal;
+	prop_ptr->count_offset   = prop->count_offset;
+
+	/* specify that the user wants this property */
+	elem->props[index].store = STORE_PROP;
+
+	return 1;
+}
+
+
+/******************************************************************************
+Read one element from the file.  This routine assumes that we're reading
+the type of element specified in the last call to the routine
+ply_get_element_setup().
+
+Entry:
+elem_ptr - pointer to location where the element information should be put
+******************************************************************************/
+
+void PlyFile::get_element( void *elem_ptr )
+{
+	if( file_type==PLY_ASCII ) _ascii_get_element( elem_ptr );
+	else                      _binary_get_element( elem_ptr );
+}
+
+/******************************************************************************
+Extract the comments from the header information of a PLY file.
+
+Exit:
+num_comments - number of comments returned
+returns a pointer to a list of comments
+******************************************************************************/
+
+std::vector< std::string > &PlyFile::get_comments( void ){ return comments; }
+
+/******************************************************************************
+Extract the object information (arbitrary text) from the header information
+of a PLY file.
+
+Exit:
+num_obj_info - number of lines of text information returned
+returns a pointer to a list of object info lines
+******************************************************************************/
+std::vector< std::string > &PlyFile::get_obj_info( void ){ return obj_info; }
+
+/******************************************************************************
+Make ready for "other" properties of an element-- those properties that
+the user has not explicitly asked for, but that are to be stashed away
+in a special structure to be carried along with the element's other
+information.
+
+Entry:
+elem    - element for which we want to save away other properties
+******************************************************************************/
+
+void setup_other_props( PlyElement *elem )
+{
+	int size = 0;
+
+	/* Examine each property in decreasing order of size. */
+	/* We do this so that all data types will be aligned by */
+	/* word, half-word, or whatever within the structure. */
+
+	for( int type_size=8 ; type_size>0 ; type_size/=2 )
+	{
+
+		/* add up the space taken by each property, and save this information */
+		/* away in the property descriptor */
+		for( int i=0 ; i<elem->props.size() ; i++ )
+		{
+			/* don't bother with properties we've been asked to store explicitly */
+			if( elem->props[i].store ) continue;
+			PlyProperty &prop = elem->props[i].prop;
+
+			/* internal types will be same as external */
+			prop.internal_type = prop.external_type;
+			prop.count_internal = prop.count_external;
+
+			/* check list case */
+			if( prop.is_list )
+			{
+				/* pointer to list */
+				if( type_size==sizeof(void *) )
+				{
+					prop.offset = size;
+					size += sizeof( void * );    /* always use size of a pointer here */
+				}
+
+				/* count of number of list elements */
+				if( type_size==ply_type_size[ prop.count_external ] )
+				{
+					prop.count_offset = size;
+					size += ply_type_size[ prop.count_external ];
+				}
+			}
+			/* not list */
+			else if( type_size==ply_type_size[ prop.external_type ] )
+			{
+				prop.offset = size;
+				size += ply_type_size[ prop.external_type ];
+			}
+		}
+	}
+
+	/* save the size for the other_props structure */
+	elem->other_size = size;
+}
+
+
+/******************************************************************************
+Specify that we want the "other" properties of an element to be tucked
+away within the user's structure.  The user needn't be concerned for how
+these properties are stored.
+
+Entry:
+elem_name - name of element that we want to store other_props in
+offset    - offset to where other_props will be stored inside user's structure
+
+Exit:
+returns pointer to structure containing description of other_props
+******************************************************************************/
+
+bool PlyFile::set_other_properties( const std::string &elem_name , int offset , PlyOtherProp &other )
+{
+	/* find information about the element */
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL )
+	{
+		WARN( "Can't find element '%s'" , elem_name.c_str() );
+		return false;
+	}
+
+	/* remember that this is the "current" element */
+	which_elem = elem;
+
+	/* save the offset to where to store the other_props */
+	elem->other_offset = offset;
+
+	/* place the appropriate pointers, etc. in the element's property list */
+	setup_other_props( elem );
+
+	/* create structure for describing other_props */
+	other.size = elem->other_size;
+	other.props.reserve( elem->props.size() );
+	for( int i=0 ; i<elem->props.size() ; i++ ) if( !elem->props[i].store ) other.props.push_back( elem->props[i].prop );
+
+	/* set other_offset pointer appropriately if there are NO other properties */
+	if( !other.props.size() ) elem->other_offset = NO_OTHER_PROPS;
+	return true;
+}
+
+/*************************/
+/*  Other Element Stuff  */
+/*************************/
+
+
+
+
+/******************************************************************************
+Grab all the data for an element that a user does not want to explicitly
+read in.
+
+Entry:
+elem_name  - name of element whose data is to be read in
+elem_count - number of instances of this element stored in the file
+
+Exit:
+returns pointer to ALL the "other" element data for this PLY file
+******************************************************************************/
+
+PlyOtherElems *PlyFile::get_other_element( std::string &elem_name , int elem_count )
+{
+	/* look for appropriate element */
+	PlyElement *elem = find_element( elem_name );
+	if( elem==NULL ) ERROR_OUT( "Can't find element '%s'" , elem_name.c_str() ) , exit(-1);
+
+	if( other_elems==NULL ) other_elems = new PlyOtherElems();
+	other_elems->other_list.resize( other_elems->other_list.size()+1 );
+	OtherElem *other = &other_elems->other_list.back();
+
+	/* save name of element */
+	other->elem_name = elem_name;
+
+	/* create a list to hold all the current elements */
+	other->other_data.resize( elem_count );
+
+	/* set up for getting elements */
+	set_other_properties( elem_name , offsetof( OtherData , other_props ) , other->other_props );
+
+	/* grab all these elements */
+	for( int i=0 ; i<other->other_data.size() ; i++ )
+	{
+		/* grab and element from the file */
+		get_element( (void *)&other->other_data[i] );
+	}
+
+	/* return pointer to the other elements data */
+	return other_elems;
+}
+
+
+/******************************************************************************
+Pass along a pointer to "other" elements that we want to save in a given
+PLY file.  These other elements were presumably read from another PLY file.
+
+Entry:
+other_elems - info about other elements that we want to store
+******************************************************************************/
+
+void PlyFile::describe_other_elements( PlyOtherElems *other_elems )
+{
+	/* ignore this call if there is no other element */
+	if( other_elems==NULL ) return;
+
+	/* save pointer to this information */
+	this->other_elems = other_elems;
+
+	/* describe the other properties of this element */
+	/* store them in the main element list as elements with
+	only other properties */
+
+	elems.reserve( elems.size() + other_elems->other_list.size() );
+	for( int i=0 ; i<other_elems->other_list.size() ; i++ )
+	{
+		PlyElement elem;
+		elem.name = other_elems->other_list[i].elem_name;
+		elem.num = (int)other_elems->other_list[i].other_data.size();
+		elem.props.resize(0);
+		describe_other_properties( other_elems->other_list[i].other_props , offsetof( OtherData , other_props ) );
+		elems.push_back( elem );
+	}
+}
+
+
+/******************************************************************************
+Write out the "other" elements specified for this PLY file.
+******************************************************************************/
+
+void PlyFile::put_other_elements( void )
+{
+	OtherElem *other;
+
+	/* make sure we have other elements to write */
+	if( other_elems==NULL ) return;
+
+	/* write out the data for each "other" element */
+	for( int i=0 ; i<other_elems->other_list.size() ; i++ )
+	{
+		other = &(other_elems->other_list[i]);
+		put_element_setup( other->elem_name );
+
+		/* write out each instance of the current element */
+		for( int j=0 ; j<other->other_data.size() ; j++ ) put_element( (void *)&other->other_data[j] );
+	}
+}
+
+/*******************/
+/*  Miscellaneous  */
+/*******************/
+
+/******************************************************************************
+Get version number and file type of a PlyFile.
+
+Exit:
+version - version of the file
+file_type - PLY_ASCII, PLY_BINARY_BE, or PLY_BINARY_LE
+******************************************************************************/
+
+void PlyFile::get_info( float &version, int &file_type ){ version = this->version , file_type = this->file_type; }
+
+/******************************************************************************
+Find an element from the element list of a given PLY object.
+
+Entry:
+element - name of element we're looking for
+
+Exit:
+returns the element, or NULL if not found
+******************************************************************************/
+
+PlyElement *PlyFile::find_element( const std::string &element )
+{
+	for( int i=0 ; i<elems.size() ; i++ ) if( element==elems[i].name ) return &elems[i];
+	return NULL;
+}
+
+
+/******************************************************************************
+Find a property in the list of properties of a given element.
+
+Entry:
+elem      - pointer to element in which we want to find the property
+prop_name - name of property to find
+
+Exit:
+index - index to position in list
+returns a pointer to the property, or NULL if not found
+******************************************************************************/
+
+PlyProperty *PlyElement::find_property( const std::string &prop_name , int &index )
+{
+	for( int i=0 ; i<props.size() ; i++ ) if( prop_name==props[i].prop.name ){ index = i ; return &props[i].prop; }
+	index = -1;
+	return NULL;
+}
+
+/******************************************************************************
+Read an element from an ascii file.
+
+Entry:
+elem_ptr - pointer to element
+******************************************************************************/
+
+void PlyFile::_ascii_get_element( void *elem_ptr )
+{
+	std::vector< std::string > words;
+	PlyElement *elem;
+	int which_word;
+	void *elem_data , *item=NULL;
+	char *item_ptr;
+	int item_size;
+	int int_val;
+	unsigned int uint_val;
+	double double_val;
+	int list_count;
+	int store_it;
+	char **store_array;
+	char *orig_line;
+	char *other_data=NULL;
+	int other_flag;
+
+	/* the kind of element we're reading currently */
+	elem = which_elem;
+
+	/* do we need to setup for other_props? */
+	if( elem->other_offset!=NO_OTHER_PROPS )
+	{
+		char **ptr;
+		other_flag = 1;
+		/* make room for other_props */
+		other_data = (char *)malloc( elem->other_size );
+		/* store pointer in user's structure to the other_props */
+		ptr = (char **) ( (char*)elem_ptr + elem->other_offset);
+		*ptr = other_data;
+	}
+	else other_flag = 0;
+
+	/* read in the element */
+	words = get_words( fp , &orig_line );
+	if( !words.size() ) ERROR_OUT( "Unexpected end of file" );
+
+	which_word = 0;
+
+	for( int j=0 ; j<elem->props.size() ; j++ )
+	{
+		PlyProperty &prop = elem->props[j].prop;
+		store_it = (elem->props[j].store | other_flag);
+
+		/* store either in the user's structure or in other_props */
+		if( elem->props[j].store ) elem_data = elem_ptr;
+		else                       elem_data = other_data;
+
+		if( prop.is_list )       /* a list */
+		{
+			/* get and store the number of items in the list */
+			get_ascii_item( words[which_word++] , prop.count_external , int_val , uint_val , double_val );
+			if( store_it )
+			{
+				item = (char *)elem_data + prop.count_offset;
+				store_item( item , prop.count_internal , int_val , uint_val , double_val );
+			}
+
+			/* allocate space for an array of items and store a ptr to the array */
+			list_count = int_val;
+			item_size = ply_type_size[ prop.internal_type ];
+			store_array = (char **)( (char *)elem_data + prop.offset );
+
+			if( list_count==0 )
+			{
+				if( store_it ) *store_array = NULL;
+			}
+			else
+			{
+				if( store_it )
+				{
+					item_ptr = (char *) malloc (sizeof (char) * item_size * list_count);
+					item = item_ptr;
+					*store_array = item_ptr;
+				}
+
+				/* read items and store them into the array */
+				for( int k=0 ; k<list_count ; k++ )
+				{
+					get_ascii_item( words[which_word++] , prop.external_type , int_val , uint_val , double_val );
+					if( store_it )
+					{
+						store_item( item , prop.internal_type , int_val , uint_val , double_val );
+						item = (char *)item + item_size;
+					}
+				}
+			}
+		}
+		else                     /* not a list */
+		{
+			get_ascii_item( words[which_word++] , prop.external_type , int_val , uint_val , double_val );
+			if( store_it )
+			{
+				item = (char *)elem_data + prop.offset;
+				store_item( item , prop.internal_type , int_val , uint_val , double_val );
+			}
+		}
+	}
+}
+
 /******************************************************************************
 Read an element from a binary file.
 
- Entry:
- plyfile  - file identifier
- elem_ptr - pointer to an element
- ******************************************************************************/
- 
- void binary_get_element(PlyFile *plyfile, char *elem_ptr)
- {
-	 int j,k;
-	 PlyElement *elem;
-	 PlyProperty *prop;
-	 FILE *fp = plyfile->fp;
-	 char *elem_data,*item=NULL;
-	 char *item_ptr;
-	 int item_size;
-	 int int_val;
-	 unsigned int uint_val;
-	 double double_val;
-	 int list_count;
-	 int store_it;
-	 char **store_array;
-	 char *other_data=NULL;
-	 int other_flag;
-	 
-	 /* the kind of element we're reading currently */
-	 elem = plyfile->which_elem;
-	 
-	 /* do we need to setup for other_props? */
-	 
-	 if (elem->other_offset != NO_OTHER_PROPS) {
-		 char **ptr;
-		 other_flag = 1;
-		 /* make room for other_props */
-		 other_data = (char *) myalloc (elem->other_size);
-		 /* store pointer in user's structure to the other_props */
-		 ptr = (char **) (elem_ptr + elem->other_offset);
-		 *ptr = other_data;
-	 }
-	 else
-		 other_flag = 0;
-	 
-	 /* read in a number of elements */
-	 
-	 for (j = 0; j < elem->nprops; j++) {
-		 
-		 prop = elem->props[j];
-		 store_it = (elem->store_prop[j] | other_flag);
-		 
-		 /* store either in the user's structure or in other_props */
-		 if (elem->store_prop[j])
-			 elem_data = elem_ptr;
-		 else
-			 elem_data = other_data;
-		 
-		 if (prop->is_list) {       /* a list */
-			 
-			 /* get and store the number of items in the list */
-			 get_binary_item (fp, plyfile->file_type, prop->count_external,
-				 &int_val, &uint_val, &double_val);
-			 if (store_it) {
-				 item = elem_data + prop->count_offset;
-				 store_item(item, prop->count_internal, int_val, uint_val, double_val);
-			 }
-			 
-			 /* allocate space for an array of items and store a ptr to the array */
-			 list_count = int_val;
-			 item_size = ply_type_size[prop->internal_type];
-			 store_array = (char **) (elem_data + prop->offset);
-			 if (list_count == 0) {
-				 if (store_it)
-					 *store_array = NULL;
-			 }
-			 else {
-				 if (store_it) {
-					 item_ptr = (char *) myalloc (sizeof (char) * item_size * list_count);
-					 item = item_ptr;
-					 *store_array = item_ptr;
-				 }
-				 
-				 /* read items and store them into the array */
-				 for (k = 0; k < list_count; k++) {
-					 get_binary_item (fp, plyfile->file_type, prop->external_type,
-						 &int_val, &uint_val, &double_val);
-					 if (store_it) {
-						 store_item (item, prop->internal_type,
-							 int_val, uint_val, double_val);
-						 item += item_size;
-					 }
-				 }
-			 }
-			 
-		 }
-		 else {                     /* not a list */
-			 get_binary_item (fp, plyfile->file_type, prop->external_type,
-				 &int_val, &uint_val, &double_val);
-			 if (store_it) {
-				 item = elem_data + prop->offset;
-				 store_item (item, prop->internal_type, int_val, uint_val, double_val);
-			 }
-		 }
-		 
-	 }
- }
- 
- 
- /******************************************************************************
- Write to a file the word that represents a PLY data type.
- 
-  Entry:
-  fp   - file pointer
-  code - code for type
-  ******************************************************************************/
-  
-  void write_scalar_type (FILE *fp, int code)
-  {
-	  /* make sure this is a valid code */
-	  
-	  if (code <= PLY_START_TYPE || code >= PLY_END_TYPE) {
-		  fprintf (stderr, "write_scalar_type: bad data code = %d\n", code);
-		  exit (-1);
-	  }
-	  
-	  /* write the code to a file */
-	  
-	  fprintf (fp, "%s", type_names[code]);
-  }
-  
-  /******************************************************************************
-  Reverse the order in an array of bytes.  This is the conversion from big
-  endian to little endian and vice versa
-  
-   Entry:
-   bytes     - array of bytes to reverse (in place)
-   num_bytes - number of bytes in array
-  ******************************************************************************/
-  
-  void swap_bytes(char *bytes, int num_bytes)
-  {
-	  int i;
-	  char temp;
-	  
-	  for (i=0; i < num_bytes/2; i++)
-	  {
-		  temp = bytes[i];
-		  bytes[i] = bytes[(num_bytes-1)-i];
-		  bytes[(num_bytes-1)-i] = temp;
-	  }
-  }
-  
-  /******************************************************************************
-  Find out if this machine is big endian or little endian
-  
-   Exit:
-   set global variable, native_binary_type =
-   either PLY_BINARY_BE or PLY_BINARY_LE
-   
-  ******************************************************************************/
-  
-  void get_native_binary_type()
-  {
-	  endian_test_type test;
-	  
-	  test.int_value = 0;
-	  test.int_value = 1;
-	  if (test.byte_values[0] == 1)
-		  native_binary_type = PLY_BINARY_LE;
-	  else if (test.byte_values[sizeof(int)-1] == 1)
-		  native_binary_type = PLY_BINARY_BE;
-	  else
-	  {
-		  fprintf(stderr, "ply: Couldn't determine machine endianness.\n");
-		  fprintf(stderr, "ply: Exiting...\n");
-		  exit(1);
-	  }
-  }
-  
-  /******************************************************************************
-  Verify that all the native types are the sizes we need
-  
-   
-  ******************************************************************************/
-  
-  void check_types()
-  {
-	  if ((ply_type_size[PLY_CHAR] != sizeof(char)) ||
-		  (ply_type_size[PLY_SHORT] != sizeof(short)) ||	
-		  (ply_type_size[PLY_INT] != sizeof(int)) ||	
-		  (ply_type_size[PLY_UCHAR] != sizeof(unsigned char)) ||	
-		  (ply_type_size[PLY_USHORT] != sizeof(unsigned short)) ||	
-		  (ply_type_size[PLY_UINT] != sizeof(unsigned int)) ||	
-		  (ply_type_size[PLY_FLOAT] != sizeof(float)) ||	
-		  (ply_type_size[PLY_DOUBLE] != sizeof(double)))
-	  {
-		  fprintf(stderr, "ply: Type sizes do not match built-in types\n");
-		  fprintf(stderr, "ply: Exiting...\n");
-		  exit(1);
-	  }
-	  
-	  types_checked = 1;
-  }
-  
-  /******************************************************************************
-  Get a text line from a file and break it up into words.
-  
-   IMPORTANT: The calling routine call "free" on the returned pointer once
-   finished with it.
-   
-	Entry:
-	fp - file to read from
-	
-	 Exit:
-	 nwords    - number of words returned
-	 orig_line - the original line of characters
-	 returns a list of words from the line, or NULL if end-of-file
-  ******************************************************************************/
-  
-  char **get_words(FILE *fp, int *nwords, char **orig_line)
-  {
+Entry:
+elem_ptr - pointer to an element
+******************************************************************************/
+
+void PlyFile::_binary_get_element( void *elem_ptr )
+{
+	PlyElement *elem;
+	void *elem_data , *item=NULL;
+	char *item_ptr;
+	int item_size;
+	int int_val;
+	unsigned int uint_val;
+	double double_val;
+	int list_count;
+	int store_it;
+	char **store_array;
+	char *other_data=NULL;
+	int other_flag;
+
+	/* the kind of element we're reading currently */
+	elem = which_elem;
+
+	/* do we need to setup for other_props? */
+	if( elem->other_offset!=NO_OTHER_PROPS )
+	{
+		char **ptr;
+		other_flag = 1;
+		/* make room for other_props */
+		other_data = (char *) malloc (elem->other_size);
+		/* store pointer in user's structure to the other_props */
+		ptr = (char **) ((char *)elem_ptr + elem->other_offset);
+		*ptr = other_data;
+	}
+	else other_flag = 0;
+
+	/* read in a number of elements */
+
+	for( int j=0 ; j<elem->props.size() ; j++ )
+	{
+		PlyProperty &prop = elem->props[j].prop;
+		store_it = ( elem->props[j].store | other_flag );
+
+		/* store either in the user's structure or in other_props */
+		if( elem->props[j].store ) elem_data = elem_ptr;
+		else                       elem_data = other_data;
+
+		if( prop.is_list )       /* a list */
+		{
+			/* get and store the number of items in the list */
+			get_binary_item( fp , file_type , prop.count_external , int_val, uint_val , double_val );
+			if( store_it )
+			{
+				item = (char *)elem_data + prop.count_offset;
+				store_item( item , prop.count_internal , int_val , uint_val , double_val );
+			}
+
+			/* allocate space for an array of items and store a ptr to the array */
+			list_count = int_val;
+			item_size = ply_type_size[ prop.internal_type ];
+			store_array = (char **) ((char *)elem_data + prop.offset);
+			if( list_count==0 )
+			{
+				if( store_it ) *store_array = NULL;
+			}
+			else
+			{
+				if( store_it )
+				{
+					item_ptr = (char *)malloc(sizeof (char) * item_size * list_count);
+					item = item_ptr;
+					*store_array = item_ptr;
+				}
+
+				/* read items and store them into the array */
+				for( int k=0 ; k<list_count ; k++ )
+				{
+					get_binary_item( fp , file_type , prop.external_type , int_val , uint_val , double_val );
+					if( store_it )
+					{
+						store_item( item , prop.internal_type , int_val , uint_val , double_val );
+						item = (char *)item + item_size;
+					}
+				}
+			}
+		}
+		else                     /* not a list */
+		{
+			get_binary_item( fp , file_type , prop.external_type , int_val , uint_val , double_val );
+			if( store_it )
+			{
+				item = (char *)elem_data + prop.offset;
+				store_item( item , prop.internal_type , int_val , uint_val , double_val );
+			}
+		}
+	}
+}
+
+
+/******************************************************************************
+Write to a file the word that represents a PLY data type.
+
+Entry:
+fp   - file pointer
+code - code for type
+******************************************************************************/
+
+void write_scalar_type( FILE *fp , int code )
+{
+	/* make sure this is a valid code */
+	if( code<=PLY_START_TYPE || code>=PLY_END_TYPE ) ERROR_OUT( "Bad data code = %d" , code );
+
+	/* write the code to a file */
+	fprintf( fp , "%s" , type_names[code] );
+}
+
+/******************************************************************************
+Reverse the order in an array of bytes.  This is the conversion from big
+endian to little endian and vice versa
+
+Entry:
+bytes     - array of bytes to reverse (in place)
+num_bytes - number of bytes in array
+******************************************************************************/
+
+void swap_bytes( void *bytes , int num_bytes )
+{
+	char *chars = (char *)bytes;
+
+	for( int i=0 ; i<num_bytes/2 ; i++ )
+	{
+		char temp = chars[i];
+		chars[i] = chars[(num_bytes-1)-i];
+		chars[(num_bytes-1)-i] = temp;
+	}
+}
+
+/******************************************************************************
+Find out if this machine is big endian or little endian
+
+Exit:
+set global variable, native_binary_type =
+either PLY_BINARY_BE or PLY_BINARY_LE
+
+******************************************************************************/
+
+void get_native_binary_type()
+{
+	endian_test_type test;
+
+	test.int_value = 0;
+	test.int_value = 1;
+	if     ( test.byte_values[0]==1 ) native_binary_type = PLY_BINARY_LE;
+	else if( test.byte_values[sizeof(int)-1] == 1) native_binary_type = PLY_BINARY_BE;
+	else ERROR_OUT( "Couldn't determine machine endianness" );
+}
+
+/******************************************************************************
+Verify that all the native types are the sizes we need
+
+
+******************************************************************************/
+
+void check_types()
+{
+	if( (ply_type_size[PLY_CHAR] != sizeof(char)) ||
+		(ply_type_size[PLY_SHORT] != sizeof(short)) ||	
+		(ply_type_size[PLY_INT] != sizeof(int)) ||	
+		(ply_type_size[PLY_UCHAR] != sizeof(unsigned char)) ||	
+		(ply_type_size[PLY_USHORT] != sizeof(unsigned short)) ||	
+		(ply_type_size[PLY_UINT] != sizeof(unsigned int)) ||	
+		(ply_type_size[PLY_FLOAT] != sizeof(float)) ||	
+		(ply_type_size[PLY_DOUBLE] != sizeof(double)))
+		ERROR_OUT( "Type sizes do not match built-in types" );
+
+	types_checked = 1;
+}
+
+/******************************************************************************
+Get a text line from a file and break it up into words.
+
+IMPORTANT: The calling routine call "free" on the returned pointer once
+finished with it.
+
+Entry:
+fp - file to read from
+
+Exit:
+nwords    - number of words returned
+orig_line - the original line of characters
+returns a list of words from the line, or NULL if end-of-file
+******************************************************************************/
+
+std::vector< std::string > get_words( FILE *fp , char **orig_line )
+{
 #define BIG_STRING 4096
-	  static char str[BIG_STRING];
-	  static char str_copy[BIG_STRING];
-	  char **words;
-	  int max_words = 10;
-	  int num_words = 0;
-	  char *ptr,*ptr2;
-	  char *result;
-	  
-	  words = (char **) myalloc (sizeof (char *) * max_words);
-	  
-	  /* read in a line */
-	  result = fgets (str, BIG_STRING, fp);
-	  if (result == NULL) {
-	     free(words);
-		  *nwords = 0;
-		  *orig_line = NULL;
-		  return (NULL);
-	  }
-	  /* convert line-feed and tabs into spaces */
-	  /* (this guarentees that there will be a space before the */
-	  /*  null character at the end of the string) */
-	  
-	  str[BIG_STRING-2] = ' ';
-	  str[BIG_STRING-1] = '\0';
-	  
-	  for (ptr = str, ptr2 = str_copy; *ptr != '\0'; ptr++, ptr2++) {
-		  *ptr2 = *ptr;
-		  // Added line here to manage carriage returns
-		  if (*ptr == '\t' || *ptr == '\r') {
-			  *ptr = ' ';
-			  *ptr2 = ' ';
-		  }
-		  else if (*ptr == '\n') {
-			  *ptr = ' ';
-			  *ptr2 = '\0';
-			  break;
-		  }
-	  }
-	  
-	  /* find the words in the line */
-	  
-	  ptr = str;
-	  while (*ptr != '\0') {
-		  
-		  /* jump over leading spaces */
-		  while (*ptr == ' ')
-			  ptr++;
-		  
-		  /* break if we reach the end */
-		  if (*ptr == '\0')
-			  break;
-		  
-		  /* save pointer to beginning of word */
-		  if (num_words >= max_words) {
-			  max_words += 10;
-			  words = (char **) realloc (words, sizeof (char *) * max_words);
-		  }
-		  words[num_words++] = ptr;
-		  
-		  /* jump over non-spaces */
-		  while (*ptr != ' ')
-			  ptr++;
-		  
-		  /* place a null character here to mark the end of the word */
-		  *ptr++ = '\0';
-	  }
-	  
-	  /* return the list of words */
-	  *nwords = num_words;
-	  *orig_line = str_copy;
-	  return (words);
-  }
-  
-  
-  /******************************************************************************
-  Return the value of an item, given a pointer to it and its type.
-  
-   Entry:
-   item - pointer to item
-   type - data type that "item" points to
-   
-	Exit:
-	returns a double-precision float that contains the value of the item
-  ******************************************************************************/
-  
-  double get_item_value(char *item, int type)
-  {
-	  unsigned char *puchar;
-	  char *pchar;
-	  short int *pshort;
-	  unsigned short int *pushort;
-	  int *pint;
-	  unsigned int *puint;
-	  float *pfloat;
-	  double *pdouble;
-	  int int_value;
-	  unsigned int uint_value;
-	  double double_value;
-	  
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  pchar = (char *) item;
-		  int_value = *pchar;
-		  return ((double) int_value);
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-		  puchar = (unsigned char *) item;
-		  int_value = *puchar;
-		  return ((double) int_value);
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  pshort = (short int *) item;
-		  int_value = *pshort;
-		  return ((double) int_value);
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-		  pushort = (unsigned short int *) item;
-		  int_value = *pushort;
-		  return ((double) int_value);
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  pint = (int *) item;
-		  int_value = *pint;
-		  return ((double) int_value);
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  puint = (unsigned int *) item;
-		  uint_value = *puint;
-		  return ((double) uint_value);
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  pfloat = (float *) item;
-		  double_value = *pfloat;
-		  return (double_value);
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  pdouble = (double *) item;
-		  double_value = *pdouble;
-		  return (double_value);
-	  default:
-		  fprintf (stderr, "get_item_value: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Write out an item to a file as raw binary bytes.
-  
-   Entry:
-   fp         - file to write to
-   int_val    - integer version of item
-   uint_val   - unsigned integer version of item
-   double_val - double-precision float version of item
-   type       - data type to write out
-  ******************************************************************************/
-  
-  void write_binary_item(
-	  FILE *fp,
-	  int file_type,
-	  int int_val,
-	  unsigned int uint_val,
-	  double double_val,
-	  int type
-	  )
-  {
-	  unsigned char uchar_val;
-	  char char_val;
-	  unsigned short ushort_val;
-	  short short_val;
-	  float float_val;
-	  void  *value;
-	  
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  char_val = char(int_val);
-		  value = &char_val;
-		  break;
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  short_val = short(int_val);
-		  value = &short_val;
-		  break;
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  value = &int_val;
-		  break;
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-	    uchar_val = (unsigned char)(uint_val);
-		  value = &uchar_val;
-		  break;
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-	    ushort_val = (unsigned short)(uint_val);
-		  value = &ushort_val;
-		  break;
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  value = &uint_val;
-		  break;
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  float_val = (float)double_val;
-		  value = &float_val;
-		  break;
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  value = &double_val;
-		  break;
-	  default:
-		  fprintf (stderr, "write_binary_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-
-	  
-	  if ((file_type != native_binary_type) && (ply_type_size[type] > 1))
-		  swap_bytes((char *)value, ply_type_size[type]);
-	  
-	  if (fwrite (value, ply_type_size[type], 1, fp) != 1)
-	  {
-		  fprintf(stderr, "PLY ERROR: fwrite() failed -- aborting.\n");
-		  exit(1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Write out an item to a file as ascii characters.
-  
-   Entry:
-   fp         - file to write to
-   int_val    - integer version of item
-   uint_val   - unsigned integer version of item
-   double_val - double-precision float version of item
-   type       - data type to write out
-  ******************************************************************************/
-  
-  void write_ascii_item(
-	  FILE *fp,
-	  int int_val,
-	  unsigned int uint_val,
-	  double double_val,
-	  int type
-	  )
-  {
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  if (fprintf (fp, "%d ", int_val) <= 0)
-		  {
-			  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
-			  exit(1);
-		  }
-		  break;
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-
-		  if (fprintf (fp, "%u ", uint_val) <= 0)
-		  {
-			  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
-			  exit(1);
-		  }
-		  break;
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-	  if (fprintf (fp, "%g ", double_val) <= 0)
-		  {
-			  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
-			  exit(1);
-		  }
-		  break;
-	  default:
-		  fprintf (stderr, "write_ascii_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Write out an item to a file as ascii characters.
-  
-   Entry:
-   fp   - file to write to
-   item - pointer to item to write
-   type - data type that "item" points to
-   
-	Exit:
-	returns a double-precision float that contains the value of the written item
-  ******************************************************************************/
-  
-  double old_write_ascii_item(FILE *fp, char *item, int type)
-  {
-	  unsigned char *puchar;
-	  char *pchar;
-	  short int *pshort;
-	  unsigned short int *pushort;
-	  int *pint;
-	  unsigned int *puint;
-	  float *pfloat;
-	  double *pdouble;
-	  int int_value;
-	  unsigned int uint_value;
-	  double double_value;
-	  
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  pchar = (char *) item;
-		  int_value = *pchar;
-		  fprintf (fp, "%d ", int_value);
-		  return ((double) int_value);
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-		  puchar = (unsigned char *) item;
-		  int_value = *puchar;
-		  fprintf (fp, "%d ", int_value);
-		  return ((double) int_value);
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  pshort = (short int *) item;
-		  int_value = *pshort;
-		  fprintf (fp, "%d ", int_value);
-		  return ((double) int_value);
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-		  pushort = (unsigned short int *) item;
-		  int_value = *pushort;
-		  fprintf (fp, "%d ", int_value);
-		  return ((double) int_value);
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  pint = (int *) item;
-		  int_value = *pint;
-		  fprintf (fp, "%d ", int_value);
-		  return ((double) int_value);
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  puint = (unsigned int *) item;
-		  uint_value = *puint;
-		  fprintf (fp, "%u ", uint_value);
-		  return ((double) uint_value);
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  pfloat = (float *) item;
-		  double_value = *pfloat;
-		  fprintf (fp, "%g ", double_value);
-		  return (double_value);
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  pdouble = (double *) item;
-		  double_value = *pdouble;
-		  fprintf (fp, "%g ", double_value);
-		  return (double_value);
-	  default:
-		  fprintf (stderr, "old_write_ascii_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Get the value of an item that is in memory, and place the result
-  into an integer, an unsigned integer and a double.
-  
-   Entry:
-   ptr  - pointer to the item
-   type - data type supposedly in the item
-   
-	Exit:
-	int_val    - integer value
-	uint_val   - unsigned integer value
-	double_val - double-precision floating point value
-  ******************************************************************************/
-  
-  void get_stored_item(
-	  void *ptr,
-	  int type,
-	  int *int_val,
-	  unsigned int *uint_val,
-	  double *double_val
-	  )
-  {
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  *int_val = *((char *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-		  *uint_val = *((unsigned char *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  *int_val = *((short int *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-		  *uint_val = *((unsigned short int *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  *int_val = *((int *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  *uint_val = *((unsigned int *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  *double_val = *((float *) ptr);
-		  *int_val = (int) *double_val;
-		  *uint_val = (unsigned int) *double_val;
-		  break;
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  *double_val = *((double *) ptr);
-		  *int_val = (int) *double_val;
-		  *uint_val = (unsigned int) *double_val;
-		  break;
-	  default:
-		  fprintf (stderr, "get_stored_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Get the value of an item from a binary file, and place the result
-  into an integer, an unsigned integer and a double.
-  
-   Entry:
-   fp   - file to get item from
-   type - data type supposedly in the word
-   
-	Exit:
-	int_val    - integer value
-	uint_val   - unsigned integer value
-	double_val - double-precision floating point value
-  ******************************************************************************/
-  
-  void get_binary_item(
-	  FILE *fp,
-	  int file_type,
-	  int type,
-	  int *int_val,
-	  unsigned int *uint_val,
-	  double *double_val
-	  )
-  {
-	  char c[8];
-	  void *ptr;
-	  
-	  ptr = (void *) c;
-	  
-	  if (fread (ptr, ply_type_size[type], 1, fp) != 1)
-	  {
-		  fprintf(stderr, "PLY ERROR: fread() failed -- aborting.\n");
-		  exit(1);
-	  }
-	  
-	  
-	  if ((file_type != native_binary_type) && (ply_type_size[type] > 1))
-		  swap_bytes((char *)ptr, ply_type_size[type]);
-	  
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  *int_val = *((char *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-		  *uint_val = *((unsigned char *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  *int_val = *((short int *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-		  *uint_val = *((unsigned short int *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  *int_val = *((int *) ptr);
-		  *uint_val = *int_val;
-		  *double_val = *int_val;
-		  break;
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  *uint_val = *((unsigned int *) ptr);
-		  *int_val = *uint_val;
-		  *double_val = *uint_val;
-		  break;
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  *double_val = *((float *) ptr);
-		  *int_val = (int) *double_val;
-		  *uint_val = (unsigned int) *double_val;
-		  break;
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  *double_val = *((double *) ptr);
-		  *int_val = (int) *double_val;
-		  *uint_val = (unsigned int) *double_val;
-		  break;
-	  default:
-		  fprintf (stderr, "get_binary_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Extract the value of an item from an ascii word, and place the result
-  into an integer, an unsigned integer and a double.
-  
-   Entry:
-   word - word to extract value from
-   type - data type supposedly in the word
-   
-	Exit:
-	int_val    - integer value
-	uint_val   - unsigned integer value
-	double_val - double-precision floating point value
-  ******************************************************************************/
-  
-  void get_ascii_item(
-	  char *word,
-	  int type,
-	  int *int_val,
-	  unsigned int *uint_val,
-	  double *double_val
-	  )
-  {
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  *int_val = atoi (word);
-		  *uint_val = (unsigned int) *int_val;
-		  *double_val = (double) *int_val;
-		  break;
-		  
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  *uint_val = strtol (word, (char **) NULL, 10);
-		  *int_val = (int) *uint_val;
-		  *double_val = (double) *uint_val;
-		  break;
-		  
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  *double_val = atof (word);
-		  *int_val = (int) *double_val;
-		  *uint_val = (unsigned int) *double_val;
-		  break;
-		  
-	  default:
-		  fprintf (stderr, "get_ascii_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Store a value into a place being pointed to, guided by a data type.
-  
-   Entry:
-   item       - place to store value
-   type       - data type
-   int_val    - integer version of value
-   uint_val   - unsigned integer version of value
-   double_val - double version of value
-   
-	Exit:
-	item - pointer to stored value
-  ******************************************************************************/
-  
-  void store_item (
-	  char *item,
-	  int type,
-	  int int_val,
-	  unsigned int uint_val,
-	  double double_val
-	  )
-  {
-	  unsigned char *puchar;
-	  short int *pshort;
-	  unsigned short int *pushort;
-	  int *pint;
-	  unsigned int *puint;
-	  float *pfloat;
-	  double *pdouble;
-
-
-	  switch (type) {
-	  case PLY_CHAR:
-	  case PLY_INT_8:
-		  *item = char(int_val);
-		  break;
-	  case PLY_UCHAR:
-	  case PLY_UINT_8:
-		  puchar = (unsigned char *) item;
-		  *puchar = (unsigned char)(uint_val);
-		  break;
-	  case PLY_SHORT:
-	  case PLY_INT_16:
-		  pshort = (short *) item;
-		  *pshort = short(int_val);
-		  break;
-	  case PLY_USHORT:
-	  case PLY_UINT_16:
-		  pushort = (unsigned short *) item;
-		  *pushort = (unsigned short)(uint_val);
-		  break;
-	  case PLY_INT:
-	  case PLY_INT_32:
-		  pint = (int *) item;
-		  *pint = int_val;
-		  break;
-	  case PLY_UINT:
-	  case PLY_UINT_32:
-		  puint = (unsigned int *) item;
-		  *puint = uint_val;
-		  break;
-	  case PLY_FLOAT:
-	  case PLY_FLOAT_32:
-		  pfloat = (float *) item;
-		  *pfloat = (float)double_val;
-		  break;
-	  case PLY_DOUBLE:
-	  case PLY_FLOAT_64:
-		  pdouble = (double *) item;
-		  *pdouble = double_val;
-		  break;
-	  default:
-		  fprintf (stderr, "store_item: bad type = %d\n", type);
-		  exit (-1);
-	  }
-  }
-  
-  
-  /******************************************************************************
-  Add an element to a PLY file descriptor.
-  
-   Entry:
-   plyfile - PLY file descriptor
-   words   - list of words describing the element
-   nwords  - number of words in the list
-  ******************************************************************************/
-  
-  void add_element (PlyFile *plyfile, char **words)
-  {
-	  PlyElement *elem;
-	  
-	  /* create the new element */
-	  elem = (PlyElement *) myalloc (sizeof (PlyElement));
-	  elem->name = _strdup (words[1]);
-	  elem->num = atoi (words[2]);
-	  elem->nprops = 0;
-	  
-	  /* make room for new element in the object's list of elements */
-	  if (plyfile->nelems == 0)
-		  plyfile->elems = (PlyElement **) myalloc (sizeof (PlyElement *));
-	  else
-		  plyfile->elems = (PlyElement **) realloc (plyfile->elems,
-		  sizeof (PlyElement *) * (plyfile->nelems + 1));
-	  
-	  /* add the new element to the object's list */
-	  plyfile->elems[plyfile->nelems] = elem;
-	  plyfile->nelems++;
-  }
-  
-  
-  /******************************************************************************
-  Return the type of a property, given the name of the property.
-  
-   Entry:
-   name - name of property type
-   
-	Exit:
-	returns integer code for property, or 0 if not found
-  ******************************************************************************/
-  
-  int get_prop_type(char *type_name)
-  {
-	  int i;
-	  
-	  for (i = PLY_START_TYPE + 1; i < PLY_END_TYPE; i++)
-		  if (equal_strings (type_name, type_names[i]))
-			  return (i);
-		  
-		  /* if we get here, we didn't find the type */
-		  return (0);
-  }
-  
-  
-  /******************************************************************************
-  Add a property to a PLY file descriptor.
-  
-   Entry:
-   plyfile - PLY file descriptor
-   words   - list of words describing the property
-   nwords  - number of words in the list
-  ******************************************************************************/
-  
-  void add_property (PlyFile *plyfile, char **words)
-  {
-	  PlyProperty *prop;
-	  PlyElement *elem;
-	  
-	  /* create the new property */
-	  
-	  prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
-	  
-	  if (equal_strings (words[1], "list")) {       /* is a list */
-		  prop->count_external = get_prop_type (words[2]);
-		  prop->external_type = get_prop_type (words[3]);
-		  prop->name = _strdup (words[4]);
-		  prop->is_list = 1;
-	  }
-	  else {                                        /* not a list */
-		  prop->external_type = get_prop_type (words[1]);
-		  prop->name = _strdup (words[2]);
-		  prop->is_list = 0;
-	  }
-	  
-	  /* add this property to the list of properties of the current element */
-	  
-	  elem = plyfile->elems[plyfile->nelems - 1];
-	  
-	  if (elem->nprops == 0)
-		  elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *));
-	  else
-		  elem->props = (PlyProperty **) realloc (elem->props,
-		  sizeof (PlyProperty *) * (elem->nprops + 1));
-	  
-	  elem->props[elem->nprops] = prop;
-	  elem->nprops++;
-  }
-  
-  
-  /******************************************************************************
-  Add a comment to a PLY file descriptor.
-  
-   Entry:
-   plyfile - PLY file descriptor
-   line    - line containing comment
-  ******************************************************************************/
-  
-  void add_comment (PlyFile *plyfile, char *line)
-  {
-	  int i;
-	  
-	  /* skip over "comment" and leading spaces and tabs */
-	  i = 7;
-	  while (line[i] == ' ' || line[i] == '\t')
-		  i++;
-	  
-	  ply_put_comment (plyfile, &line[i]);
-  }
-  
-  
-  /******************************************************************************
-  Add a some object information to a PLY file descriptor.
-  
-   Entry:
-   plyfile - PLY file descriptor
-   line    - line containing text info
-  ******************************************************************************/
-  
-  void add_obj_info (PlyFile *plyfile, char *line)
-  {
-	  int i;
-	  
-	  /* skip over "obj_info" and leading spaces and tabs */
-	  i = 8;
-	  while (line[i] == ' ' || line[i] == '\t')
-		  i++;
-	  
-	  ply_put_obj_info (plyfile, &line[i]);
-  }
-  
-  
-  /******************************************************************************
-  Copy a property.
-  ******************************************************************************/
-  
-  void copy_property(PlyProperty *dest, PlyProperty *src)
-  {
-	  dest->name = _strdup (src->name);
-	  dest->external_type = src->external_type;
-	  dest->internal_type = src->internal_type;
-	  dest->offset = src->offset;
-	  
-	  dest->is_list = src->is_list;
-	  dest->count_external = src->count_external;
-	  dest->count_internal = src->count_internal;
-	  dest->count_offset = src->count_offset;
-  }
-  
-  
-  /******************************************************************************
-  Allocate some memory.
-  
-   Entry:
-   size  - amount of memory requested (in bytes)
-   lnum  - line number from which memory was requested
-   fname - file name from which memory was requested
-  ******************************************************************************/
-  
-  char *my_alloc(int size, int lnum, const char *fname)
-  {
-	  char *ptr;
-	  
-	  ptr = (char *) malloc (size);
-	  
-	  if (ptr == 0) {
-		  fprintf(stderr, "Memory allocation bombed on line %d in %s\n", lnum, fname);
-	  }
-	  
-	  return (ptr);
-  }
-  
+	static char str[BIG_STRING];
+	static char str_copy[BIG_STRING];
+	std::vector< std::string > words;
+	int max_words = 10;
+	int num_words = 0;
+	char *ptr , *ptr2;
+	char *result;
+
+	/* read in a line */
+	result = fgets( str , BIG_STRING , fp );
+	if( result==NULL )
+	{
+		*orig_line = NULL;
+		return words;
+	}
+	/* convert line-feed and tabs into spaces */
+	/* (this guarentees that there will be a space before the */
+	/*  null character at the end of the string) */
+
+	str[BIG_STRING-2] = ' ';
+	str[BIG_STRING-1] = '\0';
+
+	for( ptr=str , ptr2=str_copy ; *ptr!='\0' ; ptr++ , ptr2++ )
+	{
+		*ptr2 = *ptr;
+		// Added line here to manage carriage returns
+		if( *ptr == '\t' || *ptr == '\r' )
+		{
+			*ptr = ' ';
+			*ptr2 = ' ';
+		}
+		else if( *ptr=='\n' )
+		{
+			*ptr = ' ';
+			*ptr2 = '\0';
+			break;
+		}
+	}
+
+	/* find the words in the line */
+
+	ptr = str;
+	while( *ptr!='\0' )
+	{
+		/* jump over leading spaces */
+		while( *ptr==' ' ) ptr++;
+
+		/* break if we reach the end */
+		if( *ptr=='\0' ) break;
+
+		char *_ptr = ptr;
+
+		/* jump over non-spaces */
+		while( *ptr!=' ' ) ptr++;
+
+		/* place a null character here to mark the end of the word */
+		*ptr++ = '\0';
+
+		/* save pointer to beginning of word */
+		words.push_back( _ptr );
+	}
+
+	/* return the list of words */
+	*orig_line = str_copy;
+	return words;
+}
+
+/******************************************************************************
+Return the value of an item, given a pointer to it and its type.
+
+Entry:
+item - pointer to item
+type - data type that "item" points to
+
+Exit:
+returns a double-precision float that contains the value of the item
+******************************************************************************/
+
+double get_item_value( const void *item , int type )
+{
+	switch( type )
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:    return (double)*(const               char *)item;
+	case PLY_UCHAR:
+	case PLY_UINT_8:   return (double)*(const unsigned      char *)item;
+	case PLY_SHORT:
+	case PLY_INT_16:   return (double)*(const          short int *)item;
+	case PLY_USHORT:
+	case PLY_UINT_16:  return (double)*(const unsigned short int *)item;
+	case PLY_INT:
+	case PLY_INT_32:   return (double)*(const                int *)item;
+	case PLY_UINT:
+	case PLY_UINT_32:  return (double)*(const unsigned       int *)item;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32: return (double)*(const              float *)item;
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64: return (double)*(const             double *)item;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+	return 0;
+}
+
+
+/******************************************************************************
+Write out an item to a file as raw binary bytes.
+
+Entry:
+fp         - file to write to
+int_val    - integer version of item
+uint_val   - unsigned integer version of item
+double_val - double-precision float version of item
+type       - data type to write out
+******************************************************************************/
+
+void write_binary_item( FILE *fp , int file_type , int int_val , unsigned int uint_val , double double_val , int type )
+{
+	unsigned char uchar_val;
+	char char_val;
+	unsigned short ushort_val;
+	short short_val;
+	float float_val;
+	void *value;
+
+	switch (type) {
+	case PLY_CHAR:
+	case PLY_INT_8:
+		char_val = char(int_val);
+		value = &char_val;
+		break;
+	case PLY_SHORT:
+	case PLY_INT_16:
+		short_val = short(int_val);
+		value = &short_val;
+		break;
+	case PLY_INT:
+	case PLY_INT_32:
+		value = &int_val;
+		break;
+	case PLY_UCHAR:
+	case PLY_UINT_8:
+		uchar_val = (unsigned char)(uint_val);
+		value = &uchar_val;
+		break;
+	case PLY_USHORT:
+	case PLY_UINT_16:
+		ushort_val = (unsigned short)(uint_val);
+		value = &ushort_val;
+		break;
+	case PLY_UINT:
+	case PLY_UINT_32:
+		value = &uint_val;
+		break;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32:
+		float_val = (float)double_val;
+		value = &float_val;
+		break;
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64:
+		value = &double_val;
+		break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+
+
+	if( (file_type!=native_binary_type) && (ply_type_size[type]>1) ) swap_bytes( (char *)value , ply_type_size[type] );
+	if( fwrite( value , ply_type_size[type] , 1 , fp )!=1 ) ERROR_OUT( "Failed to write binary item" );
+}
+
+
+/******************************************************************************
+Write out an item to a file as ascii characters.
+
+Entry:
+fp         - file to write to
+int_val    - integer version of item
+uint_val   - unsigned integer version of item
+double_val - double-precision float version of item
+type       - data type to write out
+******************************************************************************/
+
+void write_ascii_item( FILE *fp , int int_val , unsigned int uint_val , double double_val , int type )
+{
+	switch (type)
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:
+	case PLY_SHORT:
+	case PLY_INT_16:
+	case PLY_INT:
+	case PLY_INT_32:
+		if( fprintf( fp , "%d " , int_val )<=0 ) ERROR_OUT( "fprintf() failed -- aborting" );
+		break;
+	case PLY_UCHAR:
+	case PLY_UINT_8:
+	case PLY_USHORT:
+	case PLY_UINT_16:
+	case PLY_UINT:
+	case PLY_UINT_32:
+		if( fprintf( fp , "%u " , uint_val )<=0 ) ERROR_OUT( "fprintf() failed -- aborting" );
+		break;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32:
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64:
+		if( fprintf( fp , "%g " , double_val )<=0 ) ERROR_OUT( "fprintf() failed -- aborting" );
+		break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+}
+
+/******************************************************************************
+Get the value of an item that is in memory, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+ptr  - pointer to the item
+type - data type supposedly in the item
+
+Exit:
+int_val    - integer value
+uint_val   - unsigned integer value
+double_val - double-precision floating point value
+******************************************************************************/
+
+void get_stored_item( void *ptr , int type , int &int_val , unsigned int &uint_val , double &double_val )
+{
+	switch( type )
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:
+		int_val = *((char *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_UCHAR:
+	case PLY_UINT_8:
+		uint_val = *((unsigned char *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_SHORT:
+	case PLY_INT_16:
+		int_val = *((short int *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_USHORT:
+	case PLY_UINT_16:
+		uint_val = *((unsigned short int *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_INT:
+	case PLY_INT_32:
+		int_val = *((int *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_UINT:
+	case PLY_UINT_32:
+		uint_val = *((unsigned int *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32:
+		double_val = *((float *) ptr);
+		int_val = (int)double_val;
+		uint_val = (unsigned int)double_val;
+		break;
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64:
+		double_val = *((double *) ptr);
+		int_val = (int)double_val;
+		uint_val = (unsigned int)double_val;
+		break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+}
+
+/******************************************************************************
+Get the value of an item from a binary file, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+fp   - file to get item from
+type - data type supposedly in the word
+
+Exit:
+int_val    - integer value
+uint_val   - unsigned integer value
+double_val - double-precision floating point value
+******************************************************************************/
+
+void get_binary_item( FILE *fp , int file_type , int type , int &int_val , unsigned int &uint_val , double &double_val )
+{
+	char c[8];
+	void *ptr;
+
+	ptr = ( void * )c;
+
+	if( fread( ptr , ply_type_size[type] , 1 , fp )!=1 ) ERROR_OUT( "fread() failed -- aborting." );
+	if( ( file_type!=native_binary_type ) && ( ply_type_size[type]>1 ) ) swap_bytes( (char *)ptr , ply_type_size[type] );
+
+	switch( type )
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:
+		int_val = *((char *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_UCHAR:
+	case PLY_UINT_8:
+		uint_val = *((unsigned char *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_SHORT:
+	case PLY_INT_16:
+		int_val = *((short int *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_USHORT:
+	case PLY_UINT_16:
+		uint_val = *((unsigned short int *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_INT:
+	case PLY_INT_32:
+		int_val = *((int *) ptr);
+		uint_val = int_val;
+		double_val = int_val;
+		break;
+	case PLY_UINT:
+	case PLY_UINT_32:
+		uint_val = *((unsigned int *) ptr);
+		int_val = uint_val;
+		double_val = uint_val;
+		break;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32:
+		double_val = *((float *) ptr);
+		int_val = (int)double_val;
+		uint_val = (unsigned int)double_val;
+		break;
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64:
+		double_val = *((double *) ptr);
+		int_val = (int)double_val;
+		uint_val = (unsigned int)double_val;
+		break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+}
+
+/******************************************************************************
+Extract the value of an item from an ascii word, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+word - word to extract value from
+type - data type supposedly in the word
+
+Exit:
+int_val    - integer value
+uint_val   - unsigned integer value
+double_val - double-precision floating point value
+******************************************************************************/
+void get_ascii_item( const std::string &word , int type , int &int_val , unsigned int &uint_val , double &double_val )
+{
+	switch( type )
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:
+	case PLY_UCHAR:
+	case PLY_UINT_8:
+	case PLY_SHORT:
+	case PLY_INT_16:
+	case PLY_USHORT:
+	case PLY_UINT_16:
+	case PLY_INT:
+	case PLY_INT_32:
+		int_val = atoi( word.c_str() );
+		uint_val = (unsigned int)int_val;
+		double_val = (double)int_val;
+		break;
+
+	case PLY_UINT:
+	case PLY_UINT_32:
+		uint_val = strtol( word.c_str() , (char **)NULL , 10 );
+		int_val = (int)uint_val;
+		double_val = (double)uint_val;
+		break;
+
+	case PLY_FLOAT:
+	case PLY_FLOAT_32:
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64:
+		double_val = atof( word.c_str() );
+		int_val = (int)double_val;
+		uint_val = (unsigned int)double_val;
+		break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+}
+
+/******************************************************************************
+Store a value into a place being pointed to, guided by a data type.
+
+Entry:
+item       - place to store value
+type       - data type
+int_val    - integer version of value
+uint_val   - unsigned integer version of value
+double_val - double version of value
+
+Exit:
+item - pointer to stored value
+******************************************************************************/
+
+void store_item( void *item , int type , int int_val , unsigned int uint_val , double double_val )
+{
+	switch( type )
+	{
+	case PLY_CHAR:
+	case PLY_INT_8:   *(          char *)item = (          char)   int_val ; break;
+	case PLY_UCHAR:
+	case PLY_UINT_8:  *(unsigned  char *)item = (unsigned  char)  uint_val ; break;
+	case PLY_SHORT:
+	case PLY_INT_16:  *(         short *)item = (         short)   int_val ; break;
+	case PLY_USHORT:
+	case PLY_UINT_16: *(unsigned short *)item = (unsigned short)  uint_val ; break;
+	case PLY_INT:
+	case PLY_INT_32:  *(           int *)item = (           int)   int_val ; break;
+	case PLY_UINT:
+	case PLY_UINT_32: *(unsigned   int *)item = (unsigned   int)  uint_val ; break;
+	case PLY_FLOAT:
+	case PLY_FLOAT_32: *(        float *)item = (         float)double_val ; break;
+	case PLY_DOUBLE:
+	case PLY_FLOAT_64: *(       double *)item = (        double)double_val ; break;
+	default: ERROR_OUT( "Bad type = %d" , type );
+	}
+}
+
+
+/******************************************************************************
+Add an element to a PLY file descriptor.
+
+Entry:
+plyfile - PLY file descriptor
+words   - list of words describing the element
+nwords  - number of words in the list
+******************************************************************************/
+
+void PlyFile::add_element( const std::vector< std::string > &words )
+{
+	PlyElement elem;
+
+	/* set the new element */
+	elem.name = words[1];
+	elem.num = atoi( words[2].c_str() );
+	elem.props.resize(0);
+
+	/* add the new element to the object's list */
+	elems.push_back( elem );
+}
+
+/******************************************************************************
+Return the type of a property, given the name of the property.
+
+Entry:
+name - name of property type
+
+Exit:
+returns integer code for property, or 0 if not found
+******************************************************************************/
+
+int get_prop_type( const std::string &type_name )
+{
+	for( int i=PLY_START_TYPE+1 ; i<PLY_END_TYPE ; i++ ) if( type_name==type_names[i] ) return i;
+
+	/* if we get here, we didn't find the type */
+	return 0;
+}
+
+/******************************************************************************
+Add a property to a PLY file descriptor.
+
+Entry:
+plyfile - PLY file descriptor
+words   - list of words describing the property
+nwords  - number of words in the list
+******************************************************************************/
+
+void PlyFile::add_property( const std::vector< std::string > &words )
+{
+	PlyProperty prop;
+	if( words[1]=="list" )       /* is a list */
+	{
+		prop.count_external = get_prop_type( words[2] );
+		prop.external_type = get_prop_type( words[3]) ;
+		prop.name = words[4];
+		prop.is_list = 1;
+	}
+	else         /* not a list */
+	{
+		prop.external_type = get_prop_type( words[1] );
+		prop.name = words[2];
+		prop.is_list = 0;
+	}
+
+	/* add this property to the list of properties of the current element */
+	elems.back().props.push_back( PlyStoredProperty( prop , DONT_STORE_PROP ) );
+}
+
+
+/******************************************************************************
+Add a comment to a PLY file descriptor.
+
+Entry:
+plyfile - PLY file descriptor
+line    - line containing comment
+******************************************************************************/
+
+void PlyFile::add_comment( const std::string &line )
+{
+	/* skip over "comment" and leading spaces and tabs */
+	int i = 7;
+	while( line[i]==' ' || line[i] =='\t' ) i++;
+
+	put_comment( line.substr(i) );
+}
+
+
+/******************************************************************************
+Add a some object information to a PLY file descriptor.
+
+Entry:
+plyfile - PLY file descriptor
+line    - line containing text info
+******************************************************************************/
+
+void PlyFile::add_obj_info( const std::string &line )
+{
+	/* skip over "obj_info" and leading spaces and tabs */
+	int i = 8;
+	while( line[i]==' ' || line[i]=='\t' ) i++;
+	put_obj_info( line.substr(i) );
+}
diff --git a/Src/PlyFile.h b/Src/PlyFile.h
new file mode 100644
index 0000000..84b8a69
--- /dev/null
+++ b/Src/PlyFile.h
@@ -0,0 +1,198 @@
+/*
+
+Header for PLY polygon files.
+
+- Greg Turk, March 1994
+
+A PLY file contains a single polygonal _object_.
+
+An object is composed of lists of _elements_.  Typical elements are
+vertices, faces, edges and materials.
+
+Each type of element for a given object has one or more _properties_
+associated with the element type.  For instance, a vertex element may
+have as properties three floating-point values x,y,z and three unsigned
+chars for red, green and blue.
+
+---------------------------------------------------------------
+
+Copyright (c) 1994 The Board of Trustees of The Leland Stanford
+Junior University.  All rights reserved.   
+
+Permission to use, copy, modify and distribute this software and its   
+documentation for any purpose is hereby granted without fee, provided   
+that the above copyright notice and this permission notice appear in   
+all copies of this software and that you do not sell the software.   
+
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
+*/
+
+#ifndef __PLY_FILE_H__
+#define __PLY_FILE_H__
+
+#define MISHA_PLY
+
+#include <string>
+#include <vector>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+#define PLY_ASCII         1      /* ascii PLY file */
+#define PLY_BINARY_BE     2      /* binary PLY file, big endian */
+#define PLY_BINARY_LE     3      /* binary PLY file, little endian */
+#define PLY_BINARY_NATIVE 4      /* binary PLY file, same endianness as current architecture */
+
+#define PLY_OKAY    0           /* ply routine worked okay */
+#define PLY_ERROR  -1           /* error in ply routine */
+
+	/* scalar data types supported by PLY format */
+
+#define PLY_START_TYPE 0
+#define PLY_CHAR       1
+#define PLY_SHORT      2
+#define PLY_INT        3
+#define PLY_UCHAR      4
+#define PLY_USHORT     5
+#define PLY_UINT       6
+#define PLY_FLOAT      7
+#define PLY_DOUBLE     8
+#define PLY_INT_8      9
+#define PLY_UINT_8     10
+#define PLY_INT_16     11
+#define PLY_UINT_16    12
+#define PLY_INT_32     13
+#define PLY_UINT_32    14
+#define PLY_FLOAT_32   15
+#define PLY_FLOAT_64   16
+
+#define PLY_END_TYPE   17
+
+#define  PLY_SCALAR  0
+#define  PLY_LIST    1
+
+#define PLY_STRIP_COMMENT_HEADER 0
+
+/* description of a property */
+struct PlyProperty
+{
+	std::string name;                     /* property name */
+	int external_type;                    /* file's data type */
+	int internal_type;                    /* program's data type */
+	int offset;                           /* offset bytes of prop in a struct */
+
+	int is_list;                          /* 1 = list, 0 = scalar */
+	int count_external;                   /* file's count type */
+	int count_internal;                   /* program's count type */
+	int count_offset;                     /* offset byte for list count */
+
+	PlyProperty( const std::string &n , int et , int it , int o , int il=0 , int ce=0 , int ci=0 , int co=0 ) : name(n) , external_type(et) , internal_type(it) , offset(o) , is_list(il) , count_external(ce) , count_internal(ci) , count_offset(co){ }
+	PlyProperty( const std::string &n ) : PlyProperty( n , 0 , 0 , 0 , 0 , 0 , 0 , 0 ){ }
+	PlyProperty( void ) : external_type(0) , internal_type(0) , offset(0) , is_list(0) , count_external(0) , count_internal(0) , count_offset(0){ }
+};
+
+struct PlyStoredProperty
+{
+	PlyProperty prop ; char store;
+	PlyStoredProperty( void ){ }
+	PlyStoredProperty( const PlyProperty &p , char s ) : prop(p) , store(s){ }
+};
+
+/* description of an element */
+struct PlyElement
+{
+	std::string name;             /* element name */
+	int num;                      /* number of elements in this object */
+	int size;                     /* size of element (bytes) or -1 if variable */
+	std::vector< PlyStoredProperty > props; /* list of properties in the file */
+	int other_offset;             /* offset to un-asked-for props, or -1 if none*/
+	int other_size;               /* size of other_props structure */
+	PlyProperty *find_property( const std::string &prop_name , int &index );
+};
+
+/* describes other properties in an element */
+struct PlyOtherProp
+{
+	std::string name;                   /* element name */
+	int size;                           /* size of other_props */
+	std::vector< PlyProperty > props;   /* list of properties in other_props */
+};
+
+/* storing other_props for an other element */
+struct OtherData
+{
+	void *other_props;
+	OtherData( void ) : other_props(NULL){ }
+	~OtherData( void ){ if( other_props ) free( other_props ); }
+};
+
+/* data for one "other" element */
+struct OtherElem
+{
+	std::string elem_name;                /* names of other elements */
+	std::vector< OtherData > other_data;  /* actual property data for the elements */
+	PlyOtherProp other_props;             /* description of the property data */
+};
+
+/* "other" elements, not interpreted by user */
+struct PlyOtherElems
+{
+	std::vector< OtherElem > other_list; /* list of data for other elements */
+};
+
+/* description of PLY file */
+struct PlyFile
+{
+	FILE *fp;                            /* file pointer */
+	int file_type;                       /* ascii or binary */
+	float version;                       /* version number of file */
+	std::vector< PlyElement > elems;     /* list of elements of object */
+	std::vector< std::string > comments; /* list of comments */
+	std::vector< std::string > obj_info; /* list of object info items */
+	PlyElement *which_elem;              /* which element we're currently writing */
+	PlyOtherElems *other_elems;         /* "other" elements from a PLY file */
+
+	static PlyFile *Write( const std::string & , const std::vector< std::string > & , int   , float & );
+	static PlyFile *Read ( const std::string & ,       std::vector< std::string > & , int & , float & );
+
+	PlyFile( FILE *f ) : fp(f) , other_elems(NULL) , version(1.) { }
+	~PlyFile( void ){ if( fp ) fclose(fp) ; if(other_elems) delete other_elems; }
+
+	void describe_element ( const std::string & , int , int , const PlyProperty * );
+	void describe_property( const std::string & , const PlyProperty * );
+	void describe_other_elements( PlyOtherElems * );
+	PlyElement *find_element( const std::string & );
+	void element_count( const std::string & , int );
+	void header_complete( void );
+	void put_element_setup( const std::string & );
+	void put_element ( void * );
+	void put_comment ( const std::string & );
+	void put_obj_info( const std::string & );
+	void put_other_elements( void );
+	void add_element ( const std::vector< std::string > & );
+	void add_property( const std::vector< std::string > & );
+	void add_comment ( const std::string & );
+	void add_obj_info( const std::string & );
+
+	std::vector< PlyProperty * > get_element_description( const std::string & , int & );
+	void get_element_setup( const std::string & , int , PlyProperty * );
+	int get_property( const std::string & , const PlyProperty * );
+	void describe_other_properties( const PlyOtherProp & , int );
+	bool set_other_properties( const std::string & , int , PlyOtherProp & );
+	void get_element( void * );
+	std::vector< std::string > &get_comments( void );
+	std::vector< std::string > &get_obj_info( void );
+	void get_info( float & , int & );
+	PlyOtherElems *get_other_element( std::string & , int );
+protected:
+	void _ascii_get_element ( void * );
+	void _binary_get_element( void * );
+	static PlyFile *_Write( FILE * , const std::vector< std::string > & , int );
+	static PlyFile *_Read ( FILE * ,       std::vector< std::string > & );
+};
+
+#endif /* !__PLY_FILE_H__ */
diff --git a/Src/PointStream.h b/Src/PointStream.h
index fc4cb4c..9abf464 100644
--- a/Src/PointStream.h
+++ b/Src/PointStream.h
@@ -28,33 +28,35 @@ DAMAGE.
 
 #ifndef POINT_STREAM_INCLUDED
 #define POINT_STREAM_INCLUDED
+
+#include <functional>
 #include "Ply.h"
 #include "Geometry.h"
 
 
-template< class Real >
-class OrientedPointStream
+template< class Real , int Dim >
+class InputPointStream
 {
 public:
-	virtual ~OrientedPointStream( void ){}
+	virtual ~InputPointStream( void ){}
 	virtual void reset( void ) = 0;
-	virtual bool nextPoint( OrientedPoint3D< Real >& p ) = 0;
-	virtual int nextPoints( OrientedPoint3D< Real >* p , int count )
+	virtual bool nextPoint( Point< Real , Dim >& p ) = 0;
+	virtual int nextPoints( Point< Real , Dim >* p , int count )
 	{
 		int c=0;
 		for( int i=0 ; i<count ; i++ , c++ ) if( !nextPoint( p[i] ) ) break;
 		return c;
 	}
-	void boundingBox( Point3D< Real >& min , Point3D< Real >& max )
+	void boundingBox( Point< Real , Dim >& min , Point< Real , Dim >& max )
 	{
 		bool first = true;
-		OrientedPoint3D< Real > p;
+		Point< Real , Dim > p;
 		while( nextPoint( p ) )
 		{
-			for( int i=0 ; i<3 ; i++ )
+			for( int i=0 ; i<Dim ; i++ )
 			{
-				if( first || p.p[i]<min[i] ) min[i] = p.p[i];
-				if( first || p.p[i]>max[i] ) max[i] = p.p[i];
+				if( first || p[i]<min[i] ) min[i] = p[i];
+				if( first || p[i]>max[i] ) max[i] = p[i];
 			}
 			first = false;
 		}
@@ -62,168 +64,246 @@ class OrientedPointStream
 	}
 };
 
-template< class Real , class Data >
-class OrientedPointStreamWithData : public OrientedPointStream< Real >
+template< class Real , int Dim >
+class OutputPointStream
 {
 public:
-	virtual ~OrientedPointStreamWithData( void ){}
+	virtual ~OutputPointStream( void ){}
+	virtual void nextPoint( const Point< Real , Dim >& p ) = 0;
+	virtual void nextPoints( const Point< Real , Dim >* p , int count ){ for( int i=0 ; i<count ; i++ ) nextPoint( p[i] ); }
+};
+
+template< class Real , int Dim , class Data >
+class InputPointStreamWithData : public InputPointStream< Real , Dim >
+{
+public:
+	virtual ~InputPointStreamWithData( void ){}
 	virtual void reset( void ) = 0;
-	virtual bool nextPoint( OrientedPoint3D< Real >& p , Data& d ) = 0;
+	virtual bool nextPoint( Point< Real , Dim >& p , Data& d ) = 0;
 
-	virtual bool nextPoint( OrientedPoint3D< Real >& p ){ Data d ; return nextPoint( p , d ); }
-	virtual int nextPoints( OrientedPoint3D< Real >* p , Data* d , int count )
+	virtual bool nextPoint( Point< Real , Dim >& p ){ Data d ; return nextPoint( p , d ); }
+	virtual int nextPoints( Point< Real , Dim >* p , Data* d , int count )
 	{
 		int c=0;
 		for( int i=0 ; i<count ; i++ , c++ ) if( !nextPoint( p[i] , d[i] ) ) break;
 		return c;
 	}
-	virtual int nextPoints( OrientedPoint3D< Real >* p , int count ){ return OrientedPointStream< Real >::nextPoints( p , count ); }
+	virtual int nextPoints( Point< Real , Dim >* p , int count ){ return InputPointStream< Real , Dim >::nextPoints( p , count ); }
 };
 
-template< class Real >
-class TransformedOrientedPointStream : public OrientedPointStream< Real >
+template< class Real , int Dim , class Data >
+class OutputPointStreamWithData : public OutputPointStream< Real , Dim >
 {
-	XForm4x4< Real > _xForm;
-	XForm3x3< Real > _normalXForm;
-	OrientedPointStream< Real >& _stream;
 public:
-	TransformedOrientedPointStream( XForm4x4< Real > xForm , OrientedPointStream< Real >& stream ) : _xForm(xForm) , _stream(stream)
-	{
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) _normalXForm(i,j) = _xForm(i,j);
-		_normalXForm = _normalXForm.transpose().inverse();
-	};
+	virtual ~OutputPointStreamWithData( void ){}
+	virtual void nextPoint( const Point< Real , Dim >& p , const Data& d ) = 0;
+
+	virtual void nextPoint( const Point< Real , Dim >& p ){ Data d ; return nextPoint( p , d ); }
+	virtual void nextPoints( const Point< Real , Dim >* p , const Data* d , int count ){ for( int i=0 ; i<count ; i++ ) nextPoint( p[i] , d[i] ); }
+	virtual void nextPoints( const Point< Real , Dim >* p , int count ){ OutputPointStream< Real , Dim >::nextPoints( p , count ); }
+};
+
+template< class Real , int Dim >
+class TransformedInputPointStream : public InputPointStream< Real , Dim >
+{
+	std::function< void ( Point< Real , Dim >& ) > _xForm;
+	InputPointStream< Real , Dim >& _stream;
+public:
+	TransformedInputPointStream( std::function< void ( Point< Real , Dim >& ) > xForm , InputPointStream< Real , Dim >& stream ) : _xForm(xForm) , _stream(stream) {;}
 	virtual void reset( void ){ _stream.reset(); }
-	virtual bool nextPoint( OrientedPoint3D< Real >& p )
+	virtual bool nextPoint( Point< Real , Dim >& p )
 	{
 		bool ret = _stream.nextPoint( p );
-		p.p = _xForm * p.p , p.n = _normalXForm * p.n;
+		_xForm( p );
 		return ret;
 	}
 };
 
-template< class Real , class Data >
-class TransformedOrientedPointStreamWithData : public OrientedPointStreamWithData< Real , Data >
+template< class Real , int Dim >
+class TransformedOutputPointStream : public OutputPointStream< Real , Dim >
 {
-	XForm4x4< Real > _xForm;
-	XForm3x3< Real > _normalXForm;
-	OrientedPointStreamWithData< Real , Data >& _stream;
+	std::function< void ( Point< Real , Dim >& ) > _xForm;
+	OutputPointStream< Real , Dim >& _stream;
 public:
-	TransformedOrientedPointStreamWithData( XForm4x4< Real > xForm , OrientedPointStreamWithData< Real , Data >& stream ) : _xForm(xForm) , _stream(stream)
+	TransformedOutputPointStream( std::function< void ( Point< Real , Dim >& ) > xForm , OutputPointStream< Real , Dim >& stream ) : _xForm(xForm) , _stream(stream) {;}
+	virtual void reset( void ){ _stream.reset(); }
+	virtual bool nextPoint( const Point< Real , Dim >& p )
 	{
-		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) _normalXForm(i,j) = _xForm(i,j);
-		_normalXForm = _normalXForm.transpose().inverse();
-	};
+		Point< Real , Dim > _p = p;
+		_xForm( _p );
+		_stream.nextPoint( _p );
+	}
+};
+
+template< class Real , int Dim , class Data >
+class TransformedInputPointStreamWithData : public InputPointStreamWithData< Real , Dim , Data >
+{
+	std::function< void ( Point< Real , Dim >& , Data& ) > _xForm;
+	InputPointStreamWithData< Real , Dim , Data >& _stream;
+public:
+	TransformedInputPointStreamWithData( std::function< void ( Point< Real , Dim >& , Data& ) > xForm , InputPointStreamWithData< Real , Dim , Data >& stream ) : _xForm(xForm) , _stream(stream) {;}
 	virtual void reset( void ){ _stream.reset(); }
-	virtual bool nextPoint( OrientedPoint3D< Real >& p , Data& d )
+	virtual bool nextPoint( Point< Real , Dim >& p , Data& d )
 	{
 		bool ret = _stream.nextPoint( p , d );
-		p.p = _xForm * p.p , p.n = _normalXForm * p.n;
+		_xForm( p , d );
 		return ret;
 	}
 };
 
-template< class Real >
-class MemoryOrientedPointStream : public OrientedPointStream< Real >
+template< class Real , int Dim , class Data >
+class TransformedOutputPointStreamWithData : public OutputPointStreamWithData< Real , Dim , Data >
+{
+	std::function< void ( Point< Real , Dim >& , Data& ) > _xForm;
+	OutputPointStreamWithData< Real , Dim , Data >& _stream;
+public:
+	TransformedOutputPointStreamWithData( std::function< void ( Point< Real , Dim >& , Data& ) > xForm , OutputPointStreamWithData< Real , Dim , Data >& stream ) : _xForm(xForm) , _stream(stream) {;}
+	virtual void nextPoint( const Point< Real , Dim >& p , const Data& d )
+	{
+		Point< Real , Dim > _p = p;
+		Data _d = d;
+		_xForm( _p , _d );
+		_stream.nextPoint( _p , _d );
+	}
+};
+
+template< class Real , int Dim >
+class MemoryInputPointStream : public InputPointStream< Real , Dim >
 {
-	const OrientedPoint3D< Real >* _points;
+	const Point< Real , Dim >* _points;
 	size_t _pointCount;
 	size_t _current;
 public:
-	MemoryOrientedPointStream( size_t pointCount , const OrientedPoint3D< Real >* points );
-	~MemoryOrientedPointStream( void );
+	MemoryInputPointStream( size_t pointCount , const Point< Real , Dim >* points );
+	~MemoryInputPointStream( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p );
+	bool nextPoint( Point< Real , Dim >& p );
 };
 
-template< class Real , class Data >
-class MemoryOrientedPointStreamWithData : public OrientedPointStreamWithData< Real , Data >
+template< class Real , int Dim , class Data >
+class MemoryInputPointStreamWithData : public InputPointStreamWithData< Real , Dim , Data >
 {
-	const std::pair< OrientedPoint3D< Real > , Data >* _points;
+	const std::pair< Point< Real , Dim > , Data >* _points;
 	size_t _pointCount;
 	size_t _current;
 public:
-	MemoryOrientedPointStreamWithData( size_t pointCount , const std::pair< OrientedPoint3D< Real > , Data >* points );
-	~MemoryOrientedPointStreamWithData( void );
+	MemoryInputPointStreamWithData( size_t pointCount , const std::pair< Point< Real , Dim > , Data >* points );
+	~MemoryInputPointStreamWithData( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p , Data& d );
+	bool nextPoint( Point< Real , Dim >& p , Data& d );
 };
 
-template< class Real >
-class ASCIIOrientedPointStream : public OrientedPointStream< Real >
+template< class Real , int Dim >
+class ASCIIInputPointStream : public InputPointStream< Real , Dim >
 {
 	FILE* _fp;
 public:
-	ASCIIOrientedPointStream( const char* fileName );
-	~ASCIIOrientedPointStream( void );
+	ASCIIInputPointStream( const char* fileName );
+	~ASCIIInputPointStream( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p );
+	bool nextPoint( Point< Real , Dim >& p );
 };
 
-template< class Real , class Data >
-class ASCIIOrientedPointStreamWithData : public OrientedPointStreamWithData< Real , Data >
+template< class Real , int Dim >
+class ASCIIOutputPointStream : public OutputPointStream< Real , Dim >
 {
 	FILE* _fp;
-	Data (*_readData)( FILE* );
 public:
-	ASCIIOrientedPointStreamWithData( const char* fileName , Data (*readData)( FILE* ) );
-	~ASCIIOrientedPointStreamWithData( void );
-	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p , Data& d );
+	ASCIIOutputPointStream( const char* fileName );
+	~ASCIIOutputPointStream( void );
+	void nextPoint( const Point< Real , Dim >& p );
 };
 
-template< class Real , class RealOnDisk=Real >
-class BinaryOrientedPointStream : public OrientedPointStream< Real >
+template< class Real , int Dim , class Data >
+class ASCIIInputPointStreamWithData : public InputPointStreamWithData< Real , Dim , Data >
 {
 	FILE* _fp;
-	static const int POINT_BUFFER_SIZE=1024;
-	OrientedPoint3D< RealOnDisk > _pointBuffer[ POINT_BUFFER_SIZE ];
-	int _pointsInBuffer , _currentPointIndex;
+	void (*_ReadData)( FILE* , Data& );
 public:
-	BinaryOrientedPointStream( const char* filename );
-	~BinaryOrientedPointStream( void );
+	ASCIIInputPointStreamWithData( const char* fileName , void (*ReadData)( FILE* , Data& ) );
+	~ASCIIInputPointStreamWithData( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p );
+	bool nextPoint( Point< Real , Dim >& p , Data& d );
 };
 
-template< class Real , class Data , class RealOnDisk=Real , class DataOnDisk=Data >
-class BinaryOrientedPointStreamWithData : public OrientedPointStreamWithData< Real , Data >
+template< class Real , int Dim , class Data >
+class ASCIIOutputPointStreamWithData : public OutputPointStreamWithData< Real , Dim , Data >
 {
 	FILE* _fp;
-	static const int POINT_BUFFER_SIZE=1024;
-	std::pair< OrientedPoint3D< RealOnDisk > , DataOnDisk > _pointBuffer[ POINT_BUFFER_SIZE ];
-	int _pointsInBuffer , _currentPointIndex;
+	void (*_WriteData)( FILE* , const Data& );
 public:
-	BinaryOrientedPointStreamWithData( const char* filename );
-	~BinaryOrientedPointStreamWithData( void );
-	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p , Data& d );
+	ASCIIOutputPointStreamWithData( const char* fileName , void (*WriteData)( FILE* , const Data& ) );
+	~ASCIIOutputPointStreamWithData( void );
+	void nextPoint( const Point< Real , Dim >& p , const Data& d );
 };
 
-template< class Real >
-class PLYOrientedPointStream : public OrientedPointStream< Real >
+template< class Real , int Dim >
+class BinaryInputPointStream : public InputPointStream< Real , Dim >
+{
+	FILE* _fp;
+public:
+	BinaryInputPointStream( const char* filename );
+	~BinaryInputPointStream( void ){ fclose( _fp ) , _fp=NULL; }
+	void reset( void ){ fseek( _fp , SEEK_SET , 0 ); }
+	bool nextPoint( Point< Real , Dim >& p );
+};
+template< class Real , int Dim >
+class BinaryOutputPointStream : public OutputPointStream< Real , Dim >
+{
+	FILE* _fp;
+public:
+	BinaryOutputPointStream( const char* filename );
+	~BinaryOutputPointStream( void ){ fclose( _fp ) , _fp=NULL; }
+	void reset( void ){ fseek( _fp , SEEK_SET , 0 ); }
+	void nextPoint( const Point< Real , Dim >& p );
+};
+
+template< class Real , int Dim , class Data >
+class BinaryInputPointStreamWithData : public InputPointStreamWithData< Real , Dim , Data >
+{
+	FILE* _fp;
+	void (*_ReadData)( FILE* , Data& );
+public:
+	BinaryInputPointStreamWithData( const char* filename , void (*ReadData)( FILE* , Data& ) );
+	~BinaryInputPointStreamWithData( void ){ fclose( _fp ) , _fp=NULL; }
+	void reset( void ){ fseek( _fp , SEEK_SET , 0 ); }
+	bool nextPoint( Point< Real , Dim >& p , Data& d );
+};
+template< class Real , int Dim , class Data >
+class BinaryOutputPointStreamWithData : public OutputPointStreamWithData< Real , Dim , Data >
+{
+	FILE* _fp;
+	void (*_WriteData)( FILE* , const Data& );
+public:
+	BinaryOutputPointStreamWithData( const char* filename , void (*WriteData)( FILE* , const Data& ) );
+	~BinaryOutputPointStreamWithData( void ){ fclose( _fp ) , _fp=NULL; }
+	void reset( void ){ fseek( _fp , SEEK_SET , 0 ); }
+	void nextPoint( const Point< Real , Dim >& p , const Data& d );
+};
+
+template< class Real , int Dim >
+class PLYInputPointStream : public InputPointStream< Real , Dim >
 {
 	char* _fileName;
 	PlyFile* _ply;
-	int _nr_elems;
-	char **_elist;
+	std::vector< std::string > _elist;
 
 	int _pCount , _pIdx;
 	void _free( void );
 public:
-	PLYOrientedPointStream( const char* fileName );
-	~PLYOrientedPointStream( void );
+	PLYInputPointStream( const char* fileName );
+	~PLYInputPointStream( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p );
+	bool nextPoint( Point< Real , Dim >& p );
 };
 
-template< class Real , class Data >
-class PLYOrientedPointStreamWithData : public OrientedPointStreamWithData< Real , Data >
+template< class Real , int Dim , class Data >
+class PLYInputPointStreamWithData : public InputPointStreamWithData< Real , Dim , Data >
 {
-	struct _PlyOrientedVertexWithData : public PlyOrientedVertex< Real > { Data data; };
+	struct _PlyVertexWithData : public PlyVertex< Real , Dim > { Data data; };
 	char* _fileName;
 	PlyFile* _ply;
-	int _nr_elems;
-	char **_elist;
+	std::vector< std::string > _elist;
 	PlyProperty* _dataProperties;
 	int _dataPropertiesCount;
 	bool (*_validationFunction)( const bool* );
@@ -231,10 +311,33 @@ class PLYOrientedPointStreamWithData : public OrientedPointStreamWithData< Real
 	int _pCount , _pIdx;
 	void _free( void );
 public:
-	PLYOrientedPointStreamWithData( const char* fileName , const PlyProperty* dataProperties , int dataPropertiesCount , bool (*validationFunction)( const bool* )=NULL );
-	~PLYOrientedPointStreamWithData( void );
+	PLYInputPointStreamWithData( const char* fileName , const PlyProperty* dataProperties , int dataPropertiesCount , bool (*validationFunction)( const bool* )=NULL );
+	~PLYInputPointStreamWithData( void );
 	void reset( void );
-	bool nextPoint( OrientedPoint3D< Real >& p , Data& d );
+	bool nextPoint( Point< Real , Dim >& p , Data& d );
+};
+
+template< class Real , int Dim >
+class PLYOutputPointStream : public OutputPointStream< Real , Dim >
+{
+	PlyFile* _ply;
+	int _pCount , _pIdx;
+public:
+	PLYOutputPointStream( const char* fileName , size_t count , int fileType );
+	~PLYOutputPointStream( void );
+	void nextPoint( const Point< Real , Dim >& p );
+};
+
+template< class Real , int Dim , class Data >
+class PLYOutputPointStreamWithData : public OutputPointStreamWithData< Real , Dim , Data >
+{
+	struct _PlyVertexWithData : public PlyVertex< Real , Dim > { Data data; };
+	PlyFile* _ply;
+	int _pCount , _pIdx;
+public:
+	PLYOutputPointStreamWithData( const char* fileName , size_t count , int fileType , const PlyProperty* dataProperties , int dataPropertiesCount );
+	~PLYOutputPointStreamWithData( void );
+	void nextPoint( const Point< Real , Dim >& p , const Data& d );
 };
 
 #include "PointStream.inl"
diff --git a/Src/PointStream.inl b/Src/PointStream.inl
index 5d17002..139672f 100644
--- a/Src/PointStream.inl
+++ b/Src/PointStream.inl
@@ -26,18 +26,17 @@ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF S
 DAMAGE.
 */
 
-
-///////////////////////////////
-// MemoryOrientedPointStream //
-///////////////////////////////
-template< class Real >
-MemoryOrientedPointStream< Real >::MemoryOrientedPointStream( size_t pointCount , const OrientedPoint3D< Real >* points ){ _points = points , _pointCount = pointCount , _current = 0; }
-template< class Real >
-MemoryOrientedPointStream< Real >::~MemoryOrientedPointStream( void ){ ; }
-template< class Real >
-void MemoryOrientedPointStream< Real >::reset( void ) { _current=0; }
-template< class Real >
-bool MemoryOrientedPointStream< Real >::nextPoint( OrientedPoint3D< Real >& p )
+////////////////////////////
+// MemoryInputPointStream //
+////////////////////////////
+template< class Real , int Dim >
+MemoryInputPointStream< Real , Dim >::MemoryInputPointStream( size_t pointCount , const Point< Real , Dim >* points ){ _points = points , _pointCount = pointCount , _current = 0; }
+template< class Real , int Dim >
+MemoryInputPointStream< Real , Dim >::~MemoryInputPointStream( void ){ ; }
+template< class Real , int Dim >
+void MemoryInputPointStream< Real , Dim >::reset( void ) { _current=0; }
+template< class Real , int Dim >
+bool MemoryInputPointStream< Real , Dim >::nextPoint( Point< Real , Dim >& p )
 {
 	if( _current>=_pointCount ) return false;
 	p = _points[_current];
@@ -45,177 +44,154 @@ bool MemoryOrientedPointStream< Real >::nextPoint( OrientedPoint3D< Real >& p )
 	return true;
 }
 
-//////////////////////////////
-// ASCIIOrientedPointStream //
-//////////////////////////////
-template< class Real >
-ASCIIOrientedPointStream< Real >::ASCIIOrientedPointStream( const char* fileName )
+///////////////////////////
+// ASCIIInputPointStream //
+///////////////////////////
+template< class Real , int Dim >
+ASCIIInputPointStream< Real , Dim >::ASCIIInputPointStream( const char* fileName )
 {
 	_fp = fopen( fileName , "r" );
-	if( !_fp ) fprintf( stderr , "Failed to open file for reading: %s\n" , fileName ) , exit( 0 );
+	if( !_fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
 }
-template< class Real >
-ASCIIOrientedPointStream< Real >::~ASCIIOrientedPointStream( void )
+template< class Real , int Dim >
+ASCIIInputPointStream< Real , Dim >::~ASCIIInputPointStream( void )
 {
 	fclose( _fp );
 	_fp = NULL;
 }
-template< class Real >
-void ASCIIOrientedPointStream< Real >::reset( void ) { fseek( _fp , SEEK_SET , 0 ); }
-template< class Real >
-bool ASCIIOrientedPointStream< Real >::nextPoint( OrientedPoint3D< Real >& p )
+template< class Real , int Dim >
+void ASCIIInputPointStream< Real , Dim >::reset( void ) { fseek( _fp , SEEK_SET , 0 ); }
+template< class Real , int Dim >
+bool ASCIIInputPointStream< Real , Dim >::nextPoint( Point< Real , Dim >& p )
 {
-	float c[2*3];
-	if( fscanf( _fp , " %f %f %f %f %f %f " , &c[0] , &c[1] , &c[2] , &c[3] , &c[4] , &c[5] )!=2*3 ) return false;
-	p.p[0] = c[0] , p.p[1] = c[1] , p.p[2] = c[2];
-	p.n[0] = c[3] , p.n[1] = c[4] , p.n[2] = c[5];
+	float c;
+	for( int d=0 ; d<Dim ; d++ )
+		if( fscanf( _fp , " %f " , &c )!=1 ) return false;
+		else p[d] = (Real)c;
 	return true;
 }
 
-///////////////////////////////
-// BinaryOrientedPointStream //
-///////////////////////////////
-template< class Real , class RealOnDisk >
-BinaryOrientedPointStream< Real , RealOnDisk >::BinaryOrientedPointStream( const char* fileName )
+////////////////////////////
+// ASCIIOutputPointStream //
+////////////////////////////
+template< class Real , int Dim >
+ASCIIOutputPointStream< Real , Dim >::ASCIIOutputPointStream( const char* fileName )
 {
-	_pointsInBuffer = _currentPointIndex = 0;
-	_fp = fopen( fileName , "rb" );
-	if( !_fp ) fprintf( stderr , "Failed to open file for reading: %s\n" , fileName ) , exit( 0 );
+	_fp = fopen( fileName , "w" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
 }
-template< class Real , class RealOnDisk >
-BinaryOrientedPointStream< Real , RealOnDisk >::~BinaryOrientedPointStream( void )
+template< class Real , int Dim >
+ASCIIOutputPointStream< Real , Dim >::~ASCIIOutputPointStream( void )
 {
 	fclose( _fp );
 	_fp = NULL;
 }
-template< class Real , class RealOnDisk >
-void BinaryOrientedPointStream< Real , RealOnDisk >::reset( void )
+template< class Real , int Dim >
+void ASCIIOutputPointStream< Real , Dim >::nextPoint( const Point< Real , Dim >& p )
 {
-	fseek( _fp , SEEK_SET , 0 );
-	_pointsInBuffer = _currentPointIndex = 0;
-}
-template< class Real , class RealOnDisk >
-bool BinaryOrientedPointStream< Real , RealOnDisk >::nextPoint( OrientedPoint3D< Real >& p )
-{
-	if( _currentPointIndex<_pointsInBuffer )
-	{
-		p = OrientedPoint3D< Real >( _pointBuffer[ _currentPointIndex ] );
-		_currentPointIndex++;
-		return true;
-	}
-	else
-	{
-		_currentPointIndex = 0;
-		_pointsInBuffer = int( fread( _pointBuffer , sizeof( OrientedPoint3D< RealOnDisk > ) , POINT_BUFFER_SIZE , _fp ) );
-		if( !_pointsInBuffer ) return false;
-		else return nextPoint( p );
-	}
+	for( int d=0 ; d<Dim ; d++ ) fprintf( _fp , " %f" , (float)p[d] ); 
+	fprintf( _fp , "\n" );
 }
 
 ////////////////////////////
-// PLYOrientedPointStream //
+// BinaryInputPointStream //
 ////////////////////////////
-template< class Real >
-PLYOrientedPointStream< Real >::PLYOrientedPointStream( const char* fileName )
+template< class Real , int Dim >
+BinaryInputPointStream< Real , Dim >::BinaryInputPointStream( const char* fileName )
+{
+	_fp = fopen( fileName , "rb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
+}
+template< class Real , int Dim >
+bool BinaryInputPointStream< Real , Dim >::nextPoint( Point< Real , Dim >& p ){ return fread( &p , sizeof(Point< Real , Dim >) , 1 , _fp )==1; }
+
+/////////////////////////////
+// BinaryOutputPointStream //
+/////////////////////////////
+template< class Real , int Dim >
+BinaryOutputPointStream< Real , Dim >::BinaryOutputPointStream( const char* fileName )
+{
+	_fp = fopen( fileName , "wb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
+}
+template< class Real , int Dim >
+void BinaryOutputPointStream< Real , Dim >::nextPoint( const Point< Real , Dim >& p ){ fwrite( &p , sizeof(Point< Real , Dim >) , 1 , _fp )==1; }
+
+/////////////////////////
+// PLYInputPointStream //
+/////////////////////////
+template< class Real , int Dim >
+PLYInputPointStream< Real , Dim >::PLYInputPointStream( const char* fileName )
 {
 	_fileName = new char[ strlen( fileName )+1 ];
 	strcpy( _fileName , fileName );
 	_ply = NULL;
 	reset();
 }
-template< class Real >
-void PLYOrientedPointStream< Real >::reset( void )
+template< class Real , int Dim >
+void PLYInputPointStream< Real , Dim >::reset( void )
 {
 	int fileType;
 	float version;
-	PlyProperty** plist;
+	std::vector< PlyProperty * > plist;
 	if( _ply ) _free();
-	_ply = ply_open_for_reading( _fileName, &_nr_elems, &_elist, &fileType, &version );
-	if( !_ply )
-	{
-		fprintf( stderr, "[ERROR] Failed to open ply file for reading: %s\n" , _fileName );
-		exit( 0 );
-	}
+	_ply = PlyFile::Read( _fileName, _elist, fileType, version );
+	if( !_ply ) ERROR_OUT( "Failed to open ply file for reading: %s" , _fileName );
+
 	bool foundVertices = false;
-	for( int i=0 ; i<_nr_elems ; i++ )
+	for( int i=0 ; i<_elist.size() ; i++ )
 	{
 		int num_elems;
-		int nr_props;
-		char* elem_name = _elist[i];
-		plist = ply_get_element_description( _ply , elem_name , &num_elems , &nr_props );
-		if( !plist )
-		{
-			fprintf( stderr , "[ERROR] Failed to get element description: %s\n" , elem_name );
-			exit( 0 );
-		}	
+		std::string &elem_name = _elist[i];
+		plist = _ply->get_element_description( elem_name , num_elems );
+		if( !plist.size() ) ERROR_OUT( "Failed to get element description: %s" , elem_name );
 
-		if( equal_strings( "vertex" , elem_name ) )
+		if( elem_name=="vertex" )
 		{
 			foundVertices = true;
 			_pCount = num_elems , _pIdx = 0;
-			for( int i=0 ; i<PlyOrientedVertex< Real >::ReadComponents ; i++ ) 
-				if( !ply_get_property( _ply , elem_name , &(PlyOrientedVertex< Real >::ReadProperties[i]) ) )
-				{
-					fprintf( stderr , "[ERROR] Failed to find property in ply file: %s\n" , PlyOrientedVertex< Real >::ReadProperties[i].name );
-					exit( 0 );
-				}
-		}
-		for( int j=0 ; j<nr_props ; j++ )
-		{
-			free( plist[j]->name );
-			free( plist[j] );
+			for( int i=0 ; i<PlyVertex< Real , Dim >::ReadComponents ; i++ ) 
+				if( !_ply->get_property( elem_name , &(PlyVertex< Real , Dim >::Properties()[i]) ) ) ERROR_OUT( "Failed to find property in ply file: %s" , PlyVertex< Real , Dim >::Properties()[i].name );
 		}
-		free( plist );
+		for( int j=0 ; j<plist.size() ; j++ ) delete plist[j];
 		if( foundVertices ) break;
 	}
-	if( !foundVertices )
-	{
-		fprintf( stderr , "[ERROR] Could not find vertices in ply file\n" );
-		exit( 0 );
-	}
+	if( !foundVertices ) ERROR_OUT( "Could not find vertices in ply file" );
 }
-template< class Real >
-void PLYOrientedPointStream< Real >::_free( void )
-{
-	if( _ply ) ply_close( _ply ) , _ply = NULL;
-	if( _elist )
-	{
-		for( int i=0 ; i<_nr_elems ; i++ ) free( _elist[i] );
-		free( _elist );
-	}
-}
-template< class Real >
-PLYOrientedPointStream< Real >::~PLYOrientedPointStream( void )
+template< class Real , int Dim >
+void PLYInputPointStream< Real , Dim >::_free( void ){ delete _ply; }
+
+template< class Real , int Dim >
+PLYInputPointStream< Real , Dim >::~PLYInputPointStream( void )
 {
 	_free();
 	if( _fileName ) delete[] _fileName , _fileName = NULL;
 }
-template< class Real >
-bool PLYOrientedPointStream< Real >::nextPoint( OrientedPoint3D< Real >& p )
+template< class Real , int Dim >
+bool PLYInputPointStream< Real , Dim >::nextPoint( Point< Real , Dim >& p )
 {
 	if( _pIdx<_pCount )
 	{
-		PlyOrientedVertex< Real > op;
-		ply_get_element( _ply, (void *)&op );
-		p.p = op.point;
-		p.n = op.normal;
+		PlyVertex< Real , Dim > v;
+		_ply->get_element( (void *)&v );
+		p = v.point;
 		_pIdx++;
 		return true;
 	}
 	else return false;
 }
 
-///////////////////////////////////////
-// MemoryOrientedPointStreamWithData //
-///////////////////////////////////////
-template< class Real , class Data >
-MemoryOrientedPointStreamWithData< Real , Data >::MemoryOrientedPointStreamWithData( size_t pointCount , const std::pair< OrientedPoint3D< Real > , Data >* points ){ _points = points , _pointCount = pointCount , _current = 0; }
-template< class Real , class Data >
-MemoryOrientedPointStreamWithData< Real , Data >::~MemoryOrientedPointStreamWithData( void ){ ; }
-template< class Real , class Data >
-void MemoryOrientedPointStreamWithData< Real , Data >::reset( void ) { _current=0; }
-template< class Real , class Data >
-bool MemoryOrientedPointStreamWithData< Real , Data >::nextPoint( OrientedPoint3D< Real >& p , Data& d )
+////////////////////////////////////
+// MemoryInputPointStreamWithData //
+////////////////////////////////////
+template< class Real , int Dim , class Data >
+MemoryInputPointStreamWithData< Real , Dim , Data >::MemoryInputPointStreamWithData( size_t pointCount , const std::pair< Point< Real , Dim > , Data >* points ){ _points = points , _pointCount = pointCount , _current = 0; }
+template< class Real , int Dim , class Data >
+MemoryInputPointStreamWithData< Real , Dim , Data >::~MemoryInputPointStreamWithData( void ){ ; }
+template< class Real , int Dim , class Data >
+void MemoryInputPointStreamWithData< Real , Dim , Data >::reset( void ) { _current=0; }
+template< class Real , int Dim , class Data >
+bool MemoryInputPointStreamWithData< Real , Dim , Data >::nextPoint( Point< Real , Dim >& p , Data& d )
 {
 	if( _current>=_pointCount ) return false;
 	p = _points[_current].first;
@@ -224,185 +200,252 @@ bool MemoryOrientedPointStreamWithData< Real , Data >::nextPoint( OrientedPoint3
 	return true;
 }
 
-//////////////////////////////////////
-// ASCIIOrientedPointStreamWithData //
-//////////////////////////////////////
-template< class Real , class Data >
-ASCIIOrientedPointStreamWithData< Real , Data >::ASCIIOrientedPointStreamWithData( const char* fileName , Data (*readData)( FILE* ) ) : _readData( readData )
+///////////////////////////////////
+// ASCIIInputPointStreamWithData //
+///////////////////////////////////
+template< class Real , int Dim , class Data >
+ASCIIInputPointStreamWithData< Real , Dim , Data >::ASCIIInputPointStreamWithData( const char* fileName , void (*ReadData)( FILE* , Data& ) ) : _ReadData( ReadData )
 {
 	_fp = fopen( fileName , "r" );
-	if( !_fp ) fprintf( stderr , "Failed to open file for reading: %s\n" , fileName ) , exit( 0 );
+	if( !_fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
 }
-template< class Real , class Data >
-ASCIIOrientedPointStreamWithData< Real , Data >::~ASCIIOrientedPointStreamWithData( void )
+template< class Real , int Dim , class Data >
+ASCIIInputPointStreamWithData< Real , Dim , Data >::~ASCIIInputPointStreamWithData( void )
 {
 	fclose( _fp );
 	_fp = NULL;
 }
-template< class Real , class Data >
-void ASCIIOrientedPointStreamWithData< Real , Data >::reset( void ) { fseek( _fp , SEEK_SET , 0 ); }
-template< class Real , class Data >
-bool ASCIIOrientedPointStreamWithData< Real , Data >::nextPoint( OrientedPoint3D< Real >& p , Data& d )
+template< class Real , int Dim , class Data >
+void ASCIIInputPointStreamWithData< Real , Dim , Data >::reset( void ) { fseek( _fp , SEEK_SET , 0 ); }
+template< class Real , int Dim , class Data >
+bool ASCIIInputPointStreamWithData< Real , Dim , Data >::nextPoint( Point< Real , Dim >& p , Data& d )
 {
-	float c[2*3];
-	if( fscanf( _fp , " %f %f %f %f %f %f " , &c[0] , &c[1] , &c[2] , &c[3] , &c[4] , &c[5] )!=2*3 ) return false;
-	p.p[0] = c[0] , p.p[1] = c[1] , p.p[2] = c[2];
-	p.n[0] = c[3] , p.n[1] = c[4] , p.n[2] = c[5];
-	d = _readData( _fp );
+	float c;
+	for( int dd=0 ; dd<Dim ; dd++ ) 
+		if( fscanf( _fp , " %f " , &c )!=1 ) return false;
+		else p[dd] = c;
+	_ReadData( _fp , d );
 	return true;
 }
 
-///////////////////////////////////////
-// BinaryOrientedPointStreamWithData //
-///////////////////////////////////////
-template< class Real , class Data , class RealOnDisk , class DataOnDisk >
-BinaryOrientedPointStreamWithData< Real , Data , RealOnDisk , DataOnDisk >::BinaryOrientedPointStreamWithData( const char* fileName )
+////////////////////////////////////
+// ASCIIOutputPointStreamWithData //
+////////////////////////////////////
+template< class Real , int Dim , class Data >
+ASCIIOutputPointStreamWithData< Real , Dim , Data >::ASCIIOutputPointStreamWithData( const char* fileName , void (*WriteData)( FILE* , const Data& ) ) : _WriteData( WriteData )
 {
-	_pointsInBuffer = _currentPointIndex = 0;
-	_fp = fopen( fileName , "rb" );
-	if( !_fp ) fprintf( stderr , "Failed to open file for reading: %s\n" , fileName ) , exit( 0 );
+	_fp = fopen( fileName , "w" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
 }
-template< class Real , class Data , class RealOnDisk , class DataOnDisk >
-BinaryOrientedPointStreamWithData< Real , Data , RealOnDisk , DataOnDisk >::~BinaryOrientedPointStreamWithData( void )
+template< class Real , int Dim , class Data >
+ASCIIOutputPointStreamWithData< Real , Dim , Data >::~ASCIIOutputPointStreamWithData( void )
 {
 	fclose( _fp );
 	_fp = NULL;
 }
-template< class Real , class Data , class RealOnDisk , class DataOnDisk >
-void BinaryOrientedPointStreamWithData< Real , Data , RealOnDisk , DataOnDisk >::reset( void )
+template< class Real , int Dim , class Data >
+void ASCIIOutputPointStreamWithData< Real , Dim , Data >::nextPoint( const Point< Real , Dim >& p , const Data& d )
 {
-	fseek( _fp , SEEK_SET , 0 );
-	_pointsInBuffer = _currentPointIndex = 0;
+	for( int d=0 ; d<Dim ; d++ )  fprintf( _fp , " %f" , (float)p[d] );
+	fprintf( _fp , " " );
+	_WriteData( _fp , d );
+	fprintf( _fp , "\n" );
 }
-template< class Real , class Data , class RealOnDisk , class DataOnDisk >
-bool BinaryOrientedPointStreamWithData< Real , Data , RealOnDisk , DataOnDisk >::nextPoint( OrientedPoint3D< Real >& p , Data& d )
+
+////////////////////////////////////
+// BinaryInputPointStreamWithData //
+////////////////////////////////////
+template< class Real , int Dim , class Data >
+BinaryInputPointStreamWithData< Real , Dim , Data >::BinaryInputPointStreamWithData( const char* fileName , void (*ReadData)( FILE* , Data& ) ) : _ReadData(ReadData)
 {
-	if( _currentPointIndex<_pointsInBuffer )
+	_fp = fopen( fileName , "rb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for reading: %s" , fileName );
+}
+template< class Real , int Dim , class Data >
+bool BinaryInputPointStreamWithData< Real , Dim , Data >::nextPoint( Point< Real , Dim >& p , Data& d )
+{
+	if( fread( &p , sizeof(Point< Real , Dim >) , 1 , _fp )==1 )
 	{
-		p = OrientedPoint3D< Real >( _pointBuffer[ _currentPointIndex ].first );
-		d = Data( _pointBuffer[ _currentPointIndex ].second );
-		_currentPointIndex++;
+		_ReadData( _fp , d );
 		return true;
 	}
-	else
-	{
-		_currentPointIndex = 0;
-		_pointsInBuffer = int( fread( _pointBuffer , sizeof( std::pair< OrientedPoint3D< RealOnDisk > , DataOnDisk > ) , POINT_BUFFER_SIZE , _fp ) );
-		if( !_pointsInBuffer ) return false;
-		else return nextPoint( p , d );
-	}
+	else return false;
 }
 
-////////////////////////////////////
-// PLYOrientedPointStreamWithData //
-////////////////////////////////////
-template< class Real , class Data >
-PLYOrientedPointStreamWithData< Real , Data >::PLYOrientedPointStreamWithData( const char* fileName , const PlyProperty* dataProperties , int dataPropertiesCount , bool (*validationFunction)( const bool* ) ) : _dataPropertiesCount( dataPropertiesCount ) , _validationFunction( validationFunction )
+/////////////////////////////////////
+// BinaryOutputPointStreamWithData //
+/////////////////////////////////////
+template< class Real , int Dim , class Data >
+BinaryOutputPointStreamWithData< Real , Dim , Data >::BinaryOutputPointStreamWithData( const char* fileName , void (*WriteData)( FILE* , const Data& ) ) : _WriteData(WriteData)
+{
+	_fp = fopen( fileName , "wb" );
+	if( !_fp ) ERROR_OUT( "Failed to open file for writing: %s" , fileName );
+}
+template< class Real , int Dim , class Data >
+void BinaryOutputPointStreamWithData< Real , Dim , Data >::nextPoint( const Point< Real , Dim >& p , const Data& d )
+{
+	fwrite( &p , sizeof(Point< Real , Dim >) , 1 , _fp );
+	_WriteData( _fp , d );
+}
+
+/////////////////////////////////
+// PLYInputPointStreamWithData //
+/////////////////////////////////
+template< class Real , int Dim , class Data >
+PLYInputPointStreamWithData< Real , Dim , Data >::PLYInputPointStreamWithData( const char* fileName , const PlyProperty* dataProperties , int dataPropertiesCount , bool (*validationFunction)( const bool* ) ) : _dataPropertiesCount( dataPropertiesCount ) , _validationFunction( validationFunction )
 {
 	_dataProperties = new PlyProperty[ _dataPropertiesCount ];
-	memcpy( _dataProperties , dataProperties , sizeof(PlyProperty) * _dataPropertiesCount );
-	for( int i=0 ; i<_dataPropertiesCount ; i++ ) _dataProperties[i].offset += sizeof( PlyOrientedVertex< Real > );
+	for( int i=0 ; i<dataPropertiesCount ; i++ ) _dataProperties[i] = dataProperties[i];
+	for( int i=0 ; i<_dataPropertiesCount ; i++ ) _dataProperties[i].offset += sizeof( PlyVertex< Real , Dim > );
 	_fileName = new char[ strlen( fileName )+1 ];
 	strcpy( _fileName , fileName );
 	_ply = NULL;
 	reset();
 }
-template< class Real , class Data >
-void PLYOrientedPointStreamWithData< Real , Data >::reset( void )
+template< class Real , int Dim , class Data >
+void PLYInputPointStreamWithData< Real , Dim , Data >::reset( void )
 {
 	int fileType;
 	float version;
-	PlyProperty** plist;
+	std::vector< PlyProperty * > plist;
 	if( _ply ) _free();
-	_ply = ply_open_for_reading( _fileName, &_nr_elems, &_elist, &fileType, &version );
-	if( !_ply )
-	{
-		fprintf( stderr, "[ERROR] Failed to open ply file for reading: %s\n" , _fileName );
-		exit( 0 );
-	}
+	_ply = PlyFile::Read( _fileName , _elist , fileType , version );
+	if( !_ply ) ERROR_OUT( "Failed to open ply file for reading: %s" , _fileName );
+
 	bool foundVertices = false;
-	for( int i=0 ; i<_nr_elems ; i++ )
+	for( int i=0 ; i<_elist.size() ; i++ )
 	{
 		int num_elems;
-		int nr_props;
-		char* elem_name = _elist[i];
-		plist = ply_get_element_description( _ply , elem_name , &num_elems , &nr_props );
-		if( !plist )
-		{
-			fprintf( stderr , "[ERROR] Failed to get element description: %s\n" , elem_name );
-			exit( 0 );
-		}	
+		std::string &elem_name = _elist[i];
+		plist = _ply->get_element_description( elem_name , num_elems );
+		if( !plist.size() ) ERROR_OUT( "Failed to get element description: %s" , elem_name.c_str() );
 
-		if( equal_strings( "vertex" , elem_name ) )
+		if( elem_name=="vertex" )
 		{
 			foundVertices = true;
 			_pCount = num_elems , _pIdx = 0;
-			for( int i=0 ; i<PlyOrientedVertex< Real >::ReadComponents ; i++ ) 
-				if( !ply_get_property( _ply , elem_name , &(PlyOrientedVertex< Real >::ReadProperties[i]) ) )
-				{
-					fprintf( stderr , "[ERROR] Failed to find property in ply file: %s\n" , PlyOrientedVertex< Real >::ReadProperties[i].name );
-					exit( 0 );
-				}
+			const PlyProperty* PlyReadProperties = PlyVertex< Real , Dim >::PlyReadProperties();
+			for( int i=0 ; i<PlyVertex< Real , Dim >::PlyReadNum ; i++ ) 
+				if( !_ply->get_property( elem_name , &(PlyReadProperties[i]) ) ) ERROR_OUT( "Failed to find property in ply file: %s" , PlyReadProperties[i].name.c_str() );
+
 			if( _validationFunction )
 			{
 				bool* properties = new bool[_dataPropertiesCount];
 				for( int i=0 ; i<_dataPropertiesCount ; i++ )
-					if( !ply_get_property( _ply , elem_name , &(_dataProperties[i]) ) ) properties[i] = false;
-					else                                                                properties[i] = true;
+					if( !_ply->get_property( elem_name , &(_dataProperties[i]) ) ) properties[i] = false;
+					else                                                           properties[i] = true;
 				bool valid = _validationFunction( properties );
 				delete[] properties;
-				if( !valid ) fprintf( stderr , "[ERROR] Failed to validate properties in file\n" ) , exit( 0 );
+				if( !valid ) ERROR_OUT( "Failed to validate properties in file" );
 			}
 			else
 			{
 				for( int i=0 ; i<_dataPropertiesCount ; i++ )
-					if( !ply_get_property( _ply , elem_name , &(_dataProperties[i]) ) )
-						fprintf( stderr , "[WARNING] Failed to find property in ply file: %s\n" , _dataProperties[i].name );
+					if( !_ply->get_property( elem_name , &(_dataProperties[i]) ) ) WARN( "Failed to find property in ply file: %s" , _dataProperties[i].name.c_str() );
 			}
 		}
-		for( int j=0 ; j<nr_props ; j++ )
-		{
-			free( plist[j]->name );
-			free( plist[j] );
-		}
-		free( plist );
+		for( int j=0 ; j<plist.size() ; j++ ) delete plist[j];
 		if( foundVertices ) break;
 	}
-	if( !foundVertices )
-	{
-		fprintf( stderr , "[ERROR] Could not find vertices in ply file\n" );
-		exit( 0 );
-	}
-}
-template< class Real , class Data >
-void PLYOrientedPointStreamWithData< Real , Data >::_free( void )
-{
-	if( _ply ) ply_close( _ply ) , _ply = NULL;
-	if( _elist )
-	{
-		for( int i=0 ; i<_nr_elems ; i++ ) free( _elist[i] );
-		free( _elist );
-	}
+	if( !foundVertices ) ERROR_OUT( "Could not find vertices in ply file" );
 }
-template< class Real , class Data >
-PLYOrientedPointStreamWithData< Real , Data >::~PLYOrientedPointStreamWithData( void )
+template< class Real , int Dim , class Data >
+void PLYInputPointStreamWithData< Real , Dim , Data >::_free( void ){ delete _ply; }
+
+template< class Real , int Dim , class Data >
+PLYInputPointStreamWithData< Real , Dim , Data >::~PLYInputPointStreamWithData( void )
 {
 	_free();
 	if( _fileName ) delete[] _fileName , _fileName = NULL;
 	if( _dataProperties ) delete[] _dataProperties , _dataProperties = NULL;
 }
-template< class Real , class Data >
-bool PLYOrientedPointStreamWithData< Real , Data >::nextPoint( OrientedPoint3D< Real >& p , Data& d )
+template< class Real , int Dim , class Data >
+bool PLYInputPointStreamWithData< Real , Dim , Data >::nextPoint( Point< Real , Dim >& p , Data& d )
 {
 	if( _pIdx<_pCount )
 	{
-		_PlyOrientedVertexWithData op;
-		ply_get_element( _ply, (void *)&op );
-		p.p = op.point;
-		p.n = op.normal;
-		d = op.data;
+		_PlyVertexWithData v;
+		_ply->get_element( (void*) &v );
+		p = v.point;
+		d = v.data;
 		_pIdx++;
 		return true;
 	}
 	else return false;
 }
+
+//////////////////////////
+// PLYOutputPointStream //
+//////////////////////////
+template< class Real , int Dim >
+PLYOutputPointStream< Real , Dim >::PLYOutputPointStream( const char* fileName , size_t count , int fileType )
+{
+	float version;
+	std::vector< std::string > elem_names = { std::string( "vertex" ) };
+	_ply = PlyFile::Write( fileName , elem_names , fileType , version );
+	if( !_ply ) ERROR_OUT( "Failed to open ply file for writing: %s" , fileName );
+
+	_pIdx = 0;
+	_pCount = count;
+	_ply->element_count( "vertex" , _pCount );
+	for( int i=0 ; i<PlyVertex< Real , Dim >::WriteComponents ; i++ ) _ply->describe_property( "vertex" , &PlyVertex< Real , Dim >::WriteProperties()[i] );
+	_ply->header_complete();
+	_ply->put_element_setup( "vertex" );
+}
+template< class Real , int Dim >
+PLYOutputPointStream< Real , Dim >::~PLYOutputPointStream( void )
+{
+	if( _pIdx!=_pCount ) ERROR_OUT( "Streamed points not equal to total count: %d!=%d" , _pIdx , _pCount );
+	delete _ply;
+}
+template< class Real , int Dim >
+void PLYOutputPointStream< Real , Dim >::nextPoint( const Point< Real , Dim >& p )
+{
+	if( _pIdx==_pCount ) ERROR_OUT( "Trying to add more points than total: %d<%d" , _pIdx , _pCount );
+	PlyVertex< Real , Dim > op;
+	op.point = p;
+	_ply->put_element( (void *)&op );
+	_pIdx++;
+}
+
+//////////////////////////////////
+// PLYOutputPointStreamWithData //
+//////////////////////////////////
+template< class Real , int Dim , class Data >
+PLYOutputPointStreamWithData< Real , Dim , Data >::PLYOutputPointStreamWithData( const char* fileName , size_t count , int fileType , const PlyProperty* dataProperties , int dataPropertiesCount )
+{
+	float version;
+	std::vector< std::string > elem_names = { std::string( "vertex" ) };
+	_ply = PlyFile::Write( fileName , elem_names , fileType , version );
+	if( !_ply ) ERROR_OUT( "Failed to open ply file for writing: %s" , fileName );
+
+	_pIdx = 0;
+	_pCount = (int)count;
+	_ply->element_count( "vertex" , _pCount );
+	for( int i=0 ; i<PlyVertex< Real , Dim >::WriteComponents ; i++ ) _ply->describe_property( "vertex" , &PlyVertex< Real , Dim >::Properties()[i] );
+	for( int i=0 ; i<dataPropertiesCount ; i++ )
+	{
+		PlyProperty prop = dataProperties[i];
+		prop.offset += sizeof( PlyVertex< Real , Dim > );
+		_ply->describe_property( "vertex" , &prop );
+	}
+
+	_ply->header_complete();
+	_ply->put_element_setup( "vertex" );
+}
+template< class Real , int Dim , class Data >
+PLYOutputPointStreamWithData< Real , Dim , Data >::~PLYOutputPointStreamWithData( void )
+{
+	if( _pIdx!=_pCount ) ERROR_OUT( "Streamed points not equal to total count: %d!=%d" , _pIdx , _pCount );
+	delete _ply;
+}
+template< class Real , int Dim , class Data >
+void PLYOutputPointStreamWithData< Real , Dim , Data >::nextPoint( const Point< Real , Dim >& p , const Data& d )
+{
+	if( _pIdx==_pCount ) ERROR_OUT( "Trying to add more points than total: %d<%d" , _pIdx , _pCount );
+	_PlyVertexWithData op;
+	op.point = p;
+	op.data = d;
+	_ply->put_element( (void *)&op );
+	_pIdx++;
+}
diff --git a/Src/PointStreamData.h b/Src/PointStreamData.h
new file mode 100644
index 0000000..d215417
--- /dev/null
+++ b/Src/PointStreamData.h
@@ -0,0 +1,500 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+#ifndef POINT_STREAM_DATA_INCLUDED
+#define POINT_STREAM_DATA_INCLUDED
+
+#include <algorithm>
+#include <tuple>
+#include "Ply.h"
+
+template< class Real > using Color = Point< Real , 3 >;
+template< class Real > void SetColorValues( const Color< Real >& color , unsigned char c[3] ){ for( int i=0 ; i<3 ; i++ ) c[i] = (unsigned char)std::max< int >( 0 , std::min< int >( 255 , (int)( color[i]+0.5 ) ) ); }
+template< class Real > void SetColorValues( const Color< Real >& color , RGBColor& c ){ for( int i=0 ; i<3 ; i++ ) c[i] = (unsigned char)std::max< int >( 0 , std::min< int >( 255 , (int)( color[i]+0.5 ) ) ); }
+
+// Should have:
+// -- binary operators for vectors
+// -- static ReadASCII
+// -- static WriteASCII
+// -- static ReadBinary
+// -- static WriteBinary
+// -- static ValidPlyReadProperties( const bool* ) method
+// -- static int PlyReadNum
+// -- static int PlyWriteNum
+// -- static const PlyProperty* PlyReadProperties()
+// -- static const PlyProperty* PlyWriteProperties()
+// -- a nested class Transform which gets initialized by something and acts on the data
+template< typename Real , typename Data >
+struct PointStreamData
+{
+	Data data;
+
+	PointStreamData& operator += ( const PointStreamData& p ){ data += p.data ; return *this; }
+	PointStreamData& operator -= ( const PointStreamData& p ){ data -= p.data ; return *this; }
+	PointStreamData& operator *= ( Real s )                  { data *= s ; return *this; }
+	PointStreamData& operator /= ( Real s )                  { data /= s ; return *this; }
+	PointStreamData  operator +  ( const PointStreamData& p ) const { PointStreamData _p = *this ; _p += p ; return _p; }
+	PointStreamData  operator -  ( const PointStreamData& p ) const { PointStreamData _p = *this ; _p -= p ; return _p; }
+	PointStreamData  operator *  ( Real s )                   const { PointStreamData _p = *this ; _p *= s ; return _p; }
+	PointStreamData  operator /  ( Real s )                   const { PointStreamData _p = *this ; _p /= s ; return _p; }
+
+	static const int PlyReadNum;
+	static const int PlyWriteNum;
+	static const PlyProperty* PlyReadProperties( void );
+	static const PlyProperty* PlyWriteProperties( void );
+	static bool ValidPlyReadProperties( const bool* flags );
+};
+template< class Real , unsigned int Dim >
+struct PointStreamPosition : public PointStreamData< Real , Point< Real , Dim > >
+{
+	struct Transform
+	{
+		Transform( void ){}
+		Transform( const XForm< Real , Dim+1 >& xForm ) : _xForm(xForm) { }
+		PointStreamPosition operator() ( const PointStreamPosition& p ) const
+		{
+			PointStreamPosition _p;
+			_p.data = _xForm * p.data;
+			return _p;
+		}
+	protected:
+		XForm< Real , Dim+1 > _xForm;
+	};
+	static void readASCII( FILE* fp , PointStreamPosition& p )
+	{
+		float f;
+		for( int i=0 ; i<Dim ; i++ )
+			if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read color" );
+			else p.data[i] = (Real)f;
+	};
+	static void ReadBinary( FILE* fp , PointStreamPosition& p )
+	{
+		float f;
+		for( int i=0 ; i<Dim ; i++ )
+			if( fread( &f , sizeof(float) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read color" );
+			else p.data[i] = (Real)f;
+	}
+	static void WriteASCII( FILE* fp , const PointStreamPosition& p ){ for( int i=0 ; i<Dim ; i++ ) fprintf( fp , " %f" , (float)p.data[i] ); };
+	static void WriteBinary( FILE* fp , const PointStreamPosition& p )
+	{
+		for( int i=0 ; i<Dim ; i++ )
+		{
+			float f = (float)p.data[i];
+			fwrite( &f , sizeof(float) , 1 , fp );
+		}
+	}
+
+	static const int PlyReadNum = Dim;
+	static const int PlyWriteNum = Dim;
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+	static bool ValidPlyReadProperties( const bool* flags ){ for( int d=0 ; d<Dim ; d++ ) if( !flags[d] ) return false ; return true ; }
+protected:
+	static const PlyProperty _PlyProperties[];
+};
+template<>
+const PlyProperty PointStreamPosition< float , 2 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamPosition< double , 2 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamPosition< float , 3 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamPosition< double , 3 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamPosition< float , 4 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "w" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamPosition , data.coords[3] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamPosition< double , 4 >::_PlyProperties[] =
+{
+	PlyProperty( "x" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "y" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "z" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "w" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamPosition , data.coords[3] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+
+template< class Real , unsigned int Dim >
+struct PointStreamNormal : public PointStreamData< Real , Point< Real , Dim > >
+{
+	struct Transform
+	{
+		Transform( void ){}
+		Transform( const XForm< Real , Dim+1 >& xForm )
+		{
+			for( int i=0 ; i<Dim ; i++ ) for( int j=0 ; j<Dim ; j++ ) _xForm(i,j) = xForm(i,j);
+			_xForm = _xForm.transpose().inverse();
+			_xForm /= (Real)pow( fabs( _xForm.determinant() ) , 1./Dim );
+		}
+		PointStreamNormal operator() ( const PointStreamNormal& n ) const
+		{
+			PointStreamNormal _n;
+			_n.data = _xForm * n.data;
+			return _n;
+		}
+	protected:
+		XForm< Real , Dim > _xForm;
+	};
+	static void ReadASCII( FILE* fp , PointStreamNormal& p )
+	{
+		float f;
+		for( int i=0 ; i<Dim ; i++ )
+			if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read normal" );
+			else p.data[i] = (Real)f;
+	};
+	static void ReadBinary( FILE* fp , PointStreamNormal& p )
+	{
+		float f;
+		for( int i=0 ; i<Dim ; i++ )
+			if( fread( &f , sizeof(float) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read normal" );
+			else p.data[i] = (Real)f;
+	}
+	static void WriteASCII( FILE* fp , const PointStreamNormal& p ){ for( int i=0 ; i<Dim ; i++ ) fprintf( fp , " %f" , (float)p.data[i] ); };
+	static void WriteBinary( FILE* fp , const PointStreamNormal& p )
+	{
+		for( int i=0 ; i<Dim ; i++ )
+		{
+			float f = (float)p.data[i];
+			fwrite( &f , sizeof( float) , 1 , fp );
+		}
+	}
+	static const int PlyReadNum = Dim;
+	static const int PlyWriteNum = Dim;
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+	static bool ValidPlyReadProperties( const bool* flags ){ for( int d=0 ; d<Dim ; d++ ) if( !flags[d] ) return false ; return true ; }
+protected:
+	static const PlyProperty _PlyProperties[];
+};
+template<>
+const PlyProperty PointStreamNormal< float , 2 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamNormal< double , 2 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamNormal< float , 3 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nz" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamNormal< double , 3 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nz" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamNormal< float , 4 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nz" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nw" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamNormal , data.coords[3] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamNormal< double , 4 >::_PlyProperties[] =
+{
+	PlyProperty( "nx" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "ny" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nz" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "nw" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamNormal , data.coords[3] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+
+template< class Real >
+struct PointStreamColor : public PointStreamData< Real , Color< Real > >
+{
+	struct Transform
+	{
+		Transform( void ){}
+		template< typename X > Transform( const X& ){}
+		PointStreamColor operator() ( const PointStreamColor& c ) const { return c; }
+	};
+	static void ReadASCII( FILE* fp , PointStreamColor& p )
+	{
+		unsigned char c[3];
+		if( fscanf( fp , " %c %c %c " , &c[0] , &c[1] , &c[2] )!=3 ) ERROR_OUT( "Failed to read color" );
+		p.data[0] = (Real)c[0] , p.data[1] = (Real)c[1] , p.data[2] = (Real)c[2];
+	};
+	static void ReadBinary( FILE* fp , PointStreamColor& p )
+	{
+		unsigned char c[3];
+		if( fread( c , sizeof(unsigned char) , 3 , fp )!=3 ) ERROR_OUT( "Failed to read color" );
+		p.data[0] = (Real)c[0] , p.data[1] = (Real)c[1] , p.data[2] = (Real)c[2];
+	}
+	static void WriteASCII( FILE* fp , const PointStreamColor& p )
+	{
+		unsigned char c[3];
+		SetColorValues( p.data , c );
+		fprintf( fp , " %d %d %d " , c[0] , c[1] , c[2] );
+	};
+	static void WriteBinary( FILE* fp , const PointStreamColor& p )
+	{
+		unsigned char c[3];
+		SetColorValues( p.data , c );
+		fwrite( c , sizeof(unsigned char) , 3 , fp );
+	}
+	static const int PlyReadNum = 6;
+	static const int PlyWriteNum = 3;
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+	static bool ValidPlyReadProperties( const bool* flags ){ for( int d=0 ; d<3 ; d++ ) if( !flags[d] && !flags[d+3] ) return false ; return true ; }
+protected:
+	static const PlyProperty _PlyProperties[];
+};
+template<>
+const PlyProperty PointStreamColor< float >::_PlyProperties[] =
+{
+	PlyProperty( "red"   , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "green" , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "blue"  , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "r"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "g"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "b"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( PointStreamColor , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+template<>
+const PlyProperty PointStreamColor< double >::_PlyProperties[] =
+{
+	PlyProperty( "red"   , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) , 
+	PlyProperty( "green" , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "blue"  , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "r"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[0] ) ) , 0 , 0 , 0 , 0 ) , 
+	PlyProperty( "g"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[1] ) ) , 0 , 0 , 0 , 0 ) ,
+	PlyProperty( "b"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( PointStreamColor , data.coords[2] ) ) , 0 , 0 , 0 , 0 ) ,
+};
+
+template< class Real >
+struct PointStreamValue : public PointStreamData< Real , Real >
+{
+	struct Transform
+	{
+		Transform( void ){}
+		template< typename X > Transform( const X& ){}
+		PointStreamValue operator() ( const PointStreamValue& r ) const { return r; }
+	};
+	static void  ReadASCII ( FILE* fp , PointStreamValue& p ){ float f ; if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read color" ) ; p.data = (Real)f; }
+	static void  ReadBinary( FILE* fp , PointStreamValue& p ){ float f ; if( fread( &f , sizeof(float) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read color" ) ; p.data = (Real)f; }
+	static void WriteASCII ( FILE* fp , const PointStreamValue& p ){ float f = (float)p.data ; fprintf( fp , " %f " , f ); }
+	static void WriteBinary( FILE* fp , const PointStreamValue& p ){ float f = (float)p.data ; fwrite( &f , sizeof(Real) , 1 , fp ); }
+	static const int PlyReadNum = 1;
+	static const int PlyWriteNum = 1;
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+	static bool ValidPlyReadProperties( const bool* flags ){ if( !flags[0] ) return false ; return true ; }
+public:
+	static const PlyProperty _PlyProperties[];
+};
+template<>
+const PlyProperty PointStreamValue< float >::_PlyProperties[] =
+{
+	PlyProperty( "value" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamValue , data ) ) , 0 , 0 , 0 , 0 ) , 
+};
+template<>
+const PlyProperty PointStreamValue< double >::_PlyProperties[] =
+{
+	PlyProperty( "value" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamValue , data ) ) , 0 , 0 , 0 , 0 ) , 
+};
+
+template< class Real >
+struct PointStreamRoughness : public PointStreamData< Real , Real >
+{
+	struct Transform
+	{
+		Transform( void ){}
+		template< typename X > Transform( const X& ){}
+		PointStreamRoughness operator() ( const PointStreamRoughness& r ) const { return r; }
+	};
+	static void  ReadASCII ( FILE* fp , PointStreamRoughness& p ){ float f ; if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read color" ) ; p.data = (Real)f; }
+	static void  ReadBinary( FILE* fp , PointStreamRoughness& p ){ float f ; if( fread( &f , sizeof(float) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read color" ) ; p.data = (Real)f; }
+	static void WriteASCII ( FILE* fp , const PointStreamRoughness& p ){ float f = (float)p.data ; fprintf( fp , " %f " , f ); }
+	static void WriteBinary( FILE* fp , const PointStreamRoughness& p ){ float f = (float)p.data ; fwrite( &f , sizeof(Real) , 1 , fp ); }
+	static const int PlyReadNum = 1;
+	static const int PlyWriteNum = 1;
+	static const PlyProperty* PlyReadProperties( void ){ return _PlyProperties; }
+	static const PlyProperty* PlyWriteProperties( void ){ return _PlyProperties; }
+	static bool ValidPlyReadProperties( const bool* flags ){ if( !flags[0] ) return false ; return true ; }
+public:
+	static const PlyProperty _PlyProperties[];
+};
+template<>
+const PlyProperty PointStreamRoughness< float >::_PlyProperties[] =
+{
+	PlyProperty( "rg" , PLY_FLOAT , PLY_FLOAT , int( offsetof( PointStreamRoughness , data ) ) , 0 , 0 , 0 , 0 ) , 
+};
+template<>
+const PlyProperty PointStreamRoughness< double >::_PlyProperties[] =
+{
+	PlyProperty( "rg" , PLY_FLOAT , PLY_DOUBLE , int( offsetof( PointStreamRoughness , data ) ) , 0 , 0 , 0 , 0 ) , 
+};
+
+template< typename Real , typename ... Data >
+struct MultiPointStreamData : public PointStreamData< Real , std::tuple< Data ... > >
+{
+	typedef std::tuple< Data ... > MultiData;
+	using PointStreamData< Real , MultiData >::data;
+	template< unsigned int I > using DataType = typename std::tuple_element< I , MultiData >::type;
+
+	struct Transform
+	{
+		Transform( void ){}
+		template< typename X >
+		Transform( const X& x ){ _initTransforms<0>( x ); }
+		MultiPointStreamData operator() ( const MultiPointStreamData& d ) const
+		{
+			MultiPointStreamData _d;
+			_transform<0>( d , _d );
+			return _d;
+		}
+	protected:
+		typedef std::tuple< typename Data::Transform ... > Transforms;
+		template< unsigned int I > using TransformType = typename std::tuple_element< I , Transforms >::type;
+		Transforms _xForms;
+	private:
+		template< unsigned int I , typename X >
+		typename std::enable_if< I!=sizeof...(Data) >::type _initTransforms( const X& x ){ std::get< I >( _xForms ) = TransformType< I >( x ) ; _initTransforms< I+1 >( x ); }
+		template< unsigned int I , typename X >
+		typename std::enable_if< I==sizeof...(Data) >::type _initTransforms( const X& x ){ }
+		template< unsigned int I >
+		typename std::enable_if< I!=sizeof...(Data) >::type _transform( const MultiPointStreamData& in , MultiPointStreamData& out ) const { std::get< I >( out.data ) = std::get< I >( _xForms )( std::get< I >( in.data ) ) ; _transform< I+1 >( in , out ); }
+		template< unsigned int I >
+		typename std::enable_if< I==sizeof...(Data) >::type _transform( const MultiPointStreamData& in , MultiPointStreamData& out ) const { }
+	};
+
+	static void  ReadASCII ( FILE* fp , MultiPointStreamData& p ){ p._readASCII <0>( fp ); }
+	static void  ReadBinary( FILE* fp , MultiPointStreamData& p ){ p._readBinary<0>( fp ); }
+	static void WriteASCII ( FILE* fp , const MultiPointStreamData& p ){ p._writeASCII <0>( fp ); }
+	static void WriteBinary( FILE* fp , const MultiPointStreamData& p ){ p._writeBinary<0>( fp ); }
+
+	MultiPointStreamData& operator += ( const MultiPointStreamData& p ){ _add<0>( p ) ; return *this; }
+	MultiPointStreamData& operator -= ( const MultiPointStreamData& p ){ _sub<0>( p ) ; return *this; }
+	MultiPointStreamData& operator *= ( Real s )                       { _mul<0>( s ) ; return *this; }
+	MultiPointStreamData& operator /= ( Real s )                       { _div<0>( s ) ; return *this; }
+	MultiPointStreamData  operator +  ( const MultiPointStreamData& p ) const { MultiPointStreamData _p = *this ; _p += p ; return _p; }
+	MultiPointStreamData  operator -  ( const MultiPointStreamData& p ) const { MultiPointStreamData _p = *this ; _p -= p ; return _p; }
+	MultiPointStreamData  operator *  ( Real s )                        const { MultiPointStreamData _p = *this ; _p *= s ; return _p; }
+	MultiPointStreamData  operator /  ( Real s )                        const { MultiPointStreamData _p = *this ; _p /= s ; return _p; }
+
+private:
+	template< unsigned int I > static constexpr typename std::enable_if< I!=sizeof...(Data) , int >::type _PlyTotalReadNum( void ){ return DataType< I >::PlyReadNum + _PlyTotalReadNum< I+1 >(); }
+	template< unsigned int I > static constexpr typename std::enable_if< I==sizeof...(Data) , int >::type _PlyTotalReadNum( void ){ return 0; }
+	template< unsigned int I > static constexpr typename std::enable_if< I!=sizeof...(Data) , int >::type _PlyTotalWriteNum( void ){ return DataType< I >::PlyWriteNum + _PlyTotalWriteNum< I+1 >(); }
+	template< unsigned int I > static constexpr typename std::enable_if< I==sizeof...(Data) , int >::type _PlyTotalWriteNum( void ){ return 0; }
+public:
+	static const int PlyReadNum = _PlyTotalReadNum<0>();
+	static const int PlyWriteNum = _PlyTotalWriteNum<0>();
+	static PlyProperty* PlyReadProperties( void ){ _SetPlyReadProperties<0>( _PlyReadProperties ) ; return _PlyReadProperties; }
+	static PlyProperty* PlyWriteProperties( void ){ _SetPlyWriteProperties<0>( _PlyWriteProperties ) ; return _PlyWriteProperties; }
+
+	static bool ValidPlyReadProperties( const bool* flags ){ return _ValidPlyReadProperties<0>( flags ) ; }
+	template< unsigned int I > static bool ValidPlyReadProperties( const bool* flags ){ return DataType< I >::ValidPlyReadProperties( flags + _PlyReadOffset< I >() ); }
+protected:
+	static PlyProperty _PlyReadProperties[];
+	static PlyProperty _PlyWriteProperties[];
+private:
+	// Gives the offset to the I-th element
+	template< unsigned int I > static typename std::enable_if< I==0 , unsigned int >::type _PlyReadOffset( void ){ return 0; }
+	template< unsigned int I > static typename std::enable_if< I!=0 , unsigned int >::type _PlyReadOffset( void ){ return DataType< I-1 >::PlyReadNum + _PlyReadOffset< I-1 >(); }
+
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type  _readASCII ( FILE* fp )       { DataType< I >:: ReadASCII ( fp , std::get< I >( data ) ) ;  _readASCII < I+1 >( fp ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type  _readASCII ( FILE* fp )       { }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type  _readBinary( FILE* fp )       { DataType< I >:: ReadBinary( fp , std::get< I >( data ) ) ;  _readBinary< I+1 >( fp ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type  _readBinary( FILE* fp )       { }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _writeASCII ( FILE* fp ) const { DataType< I >::WriteASCII ( fp , std::get< I >( data ) ) ; _writeASCII < I+1 >( fp ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _writeASCII ( FILE* fp ) const { }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _writeBinary( FILE* fp ) const { DataType< I >::WriteBinary( fp , std::get< I >( data ) ) ; _writeBinary< I+1 >( fp ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _writeBinary( FILE* fp ) const { }
+
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _add( const MultiPointStreamData& p ){ std::get< I >( data ) += std::get< I >( p.data ) ; _add< I+1 >( p ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _add( const MultiPointStreamData& p ){ }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _sub( const MultiPointStreamData& p ){ std::get< I >( data ) -= std::get< I >( p.data ) ; _sub< I+1 >( p ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _sub( const MultiPointStreamData& p ){ }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _mul( Real s ){ std::get< I >( data ) *= s ; _mul< I+1 >( s ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _mul( Real s ){ }
+	template< unsigned int I > typename std::enable_if< I!=sizeof...(Data) >::type _div( Real s ){ std::get< I >( data ) /= s ; _div< I+1 >( s ); }
+	template< unsigned int I > typename std::enable_if< I==sizeof...(Data) >::type _div( Real s ){ }
+
+	template< unsigned int I > static typename std::enable_if< I!=sizeof...(Data) >::type _SetPlyReadProperties( PlyProperty* PlyReadProperties )
+	{
+		for( int d=0 ; d<DataType< I >::PlyReadNum ; d++ )
+		{
+			PlyReadProperties[d] = DataType< I >::PlyReadProperties()[d];
+			MultiPointStreamData temp;
+			const typename std::tuple_element< I , MultiData >::type& temp_data = std::get< I >( temp.data );
+			PlyReadProperties[d].offset += (int)( (size_t)&temp_data - (size_t)&temp );
+		}
+		_SetPlyReadProperties< I+1 >( PlyReadProperties + DataType< I >::PlyReadNum );
+	}
+	template< unsigned int I > static typename std::enable_if< I==sizeof...(Data) >::type _SetPlyReadProperties( PlyProperty* PlyReadProperties ){ }
+	template< unsigned int I > static typename std::enable_if< I!=sizeof...(Data) >::type _SetPlyWriteProperties( PlyProperty* PlyWriteProperties )
+	{
+		for( int d=0 ; d<DataType< I >::PlyWriteNum ; d++ )
+		{
+			PlyWriteProperties[d] = DataType< I >::PlyWriteProperties()[d];
+			MultiPointStreamData temp;
+			const typename std::tuple_element< I , MultiData >::type& temp_data = std::get< I >( temp.data );
+			PlyWriteProperties[d].offset += (int)( (size_t)&temp_data - (size_t)&temp );
+		}
+		_SetPlyWriteProperties< I+1 >( PlyWriteProperties + DataType< I >::PlyWriteNum );
+	}
+	template< unsigned int I > static typename std::enable_if< I==sizeof...(Data) >::type _SetPlyWriteProperties( PlyProperty* PlyWriteProperties ){ }
+
+	template< unsigned int I > static typename std::enable_if< I!=sizeof...(Data) , bool >::type _ValidPlyReadProperties( const bool* flags ){ return DataType< I >::ValidPlyReadProperties( flags ) && _ValidPlyReadProperties< I+1 >( flags + std::tuple_element< I , MultiData >::type::PlyReadNum ); }
+	template< unsigned int I > static typename std::enable_if< I==sizeof...(Data) , bool >::type _ValidPlyReadProperties( const bool* flags ){ return true; }
+};
+template< typename Real , typename ... Data > PlyProperty MultiPointStreamData< Real , Data ... >::_PlyReadProperties[ MultiPointStreamData< Real , Data ... >::PlyReadNum==0 ? 1 : MultiPointStreamData< Real , Data ... >::PlyReadNum ];
+template< typename Real , typename ... Data > PlyProperty MultiPointStreamData< Real , Data ... >::_PlyWriteProperties[ MultiPointStreamData< Real , Data ... >::PlyWriteNum==0 ? 1 : MultiPointStreamData< Real , Data ... >::PlyWriteNum ];
+
+#endif // POINT_STREAM_DATA_INCLUDED
\ No newline at end of file
diff --git a/Src/PoissonRecon.cpp b/Src/PoissonRecon.cpp
index 8e4b71f..d6a194d 100644
--- a/Src/PoissonRecon.cpp
+++ b/Src/PoissonRecon.cpp
@@ -26,287 +26,159 @@ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF S
 DAMAGE.
 */
 
-#undef FAST_COMPILE
-#undef ARRAY_DEBUG
-#define BRUNO_LEVY_FIX
-#define FOR_RELEASE
+#undef SHOW_WARNINGS							// Display compilation warnings
+#undef USE_DOUBLE								// If enabled, double-precesion is used
+#undef FAST_COMPILE								// If enabled, only a single version of the reconstruction code is compiled
+#undef ARRAY_DEBUG								// If enabled, array access is tested for validity
+#define DATA_DEGREE 0							// The order of the B-Spline used to splat in data for color interpolation
+												// This can be changed to zero if more interpolatory performance is desired.
+#define WEIGHT_DEGREE 2							// The order of the B-Spline used to splat in the weights for density estimation
+#define NORMAL_DEGREE 2							// The order of the B-Spline used to splat in the normals for constructing the Laplacian constraints
+#define DEFAULT_FEM_DEGREE 1					// The default finite-element degree
+#define DEFAULT_FEM_BOUNDARY BOUNDARY_NEUMANN	// The default finite-element boundary type
+#define DIMENSION 3								// The dimension of the system
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
-#if defined( _WIN32 ) || defined( _WIN64 )
-#include <Windows.h>
-#include <Psapi.h>
-#endif // _WIN32 || _WIN64
-#include "MyTime.h"
-#include "MarchingCubes.h"
-#include "Octree.h"
-#include "SparseMatrix.h"
+#include "MyMiscellany.h"
 #include "CmdLineParser.h"
 #include "PPolynomial.h"
+#include "FEMTree.h"
 #include "Ply.h"
-#include "MemoryUsage.h"
-#ifdef _OPENMP
-#include "omp.h"
-#endif // _OPENMP
-void DumpOutput( const char* format , ... );
-void DumpOutput2( std::vector< char* >& comments , const char* format , ... );
-#include "MultiGridOctreeData.h"
-
-#define DEFAULT_FULL_DEPTH 5
-
-#define XSTR(x) STR(x)
-#define STR(x) #x
-#if DEFAULT_FULL_DEPTH
-#pragma message ( "[WARNING] Setting default full depth to " XSTR(DEFAULT_FULL_DEPTH) )
-#endif // DEFAULT_FULL_DEPTH
-
-#include <stdarg.h>
-char* outputFile=NULL;
-int echoStdout=0;
-void DumpOutput( const char* format , ... )
-{
-	if( outputFile )
-	{
-		FILE* fp = fopen( outputFile , "a" );
-		va_list args;
-		va_start( args , format );
-		vfprintf( fp , format , args );
-		fclose( fp );
-		va_end( args );
-	}
-	if( echoStdout )
-	{
-		va_list args;
-		va_start( args , format );
-		vprintf( format , args );
-		va_end( args );
-	}
-}
-void DumpOutput2( std::vector< char* >& comments  , const char* format , ... )
-{
-	if( outputFile )
-	{
-		FILE* fp = fopen( outputFile , "a" );
-		va_list args;
-		va_start( args , format );
-		vfprintf( fp , format , args );
-		fclose( fp );
-		va_end( args );
-	}
-	if( echoStdout )
-	{
-		va_list args;
-		va_start( args , format );
-		vprintf( format , args );
-		va_end( args );
-	}
-	comments.push_back( new char[1024] );
-	char* str = comments.back();
-	va_list args;
-	va_start( args , format );
-	vsprintf( str , format , args );
-	va_end( args );
-	if( str[strlen(str)-1]=='\n' ) str[strlen(str)-1] = 0;
-}
+#include "PointStreamData.h"
+#include "Image.h"
 
+MessageWriter messageWriter;
 
-cmdLineString
+const float DefaultPointWeightMultiplier = 2.f;
+
+cmdLineParameter< char* >
 	In( "in" ) ,
 	Out( "out" ) ,
 	TempDir( "tempDir" ) ,
-	VoxelGrid( "voxel" ) ,
-	XForm( "xForm" );
+	Grid( "grid" ) ,
+	Tree( "tree" ) ,
+	Transform( "xForm" );
 
 cmdLineReadable
-#if defined( _WIN32 ) || defined( _WIN64 )
 	Performance( "performance" ) ,
-#endif // _WIN32 || _WIN64
 	ShowResidual( "showResidual" ) ,
 	NoComments( "noComments" ) ,
 	PolygonMesh( "polygonMesh" ) ,
-	Confidence( "confidence" ) ,
-	NormalWeights( "nWeights" ) ,
 	NonManifold( "nonManifold" ) ,
 	ASCII( "ascii" ) ,
 	Density( "density" ) ,
 	LinearFit( "linearFit" ) ,
-	PrimalVoxel( "primalVoxel" ) ,
-#ifndef FAST_COMPILE
-	Double( "double" ) ,
-#endif // !FAST_COMPILE
+	PrimalGrid( "primalGrid" ) ,
+	ExactInterpolation( "exact" ) ,
+	Normals( "normals" ) ,
+	Colors( "colors" ) ,
+	InCore( "inCore" ) ,
 	Verbose( "verbose" );
 
-cmdLineInt
+cmdLineParameter< int >
 #ifndef FAST_COMPILE
-	Degree( "degree" , 2 ) ,
+	Degree( "degree" , DEFAULT_FEM_DEGREE ) ,
 #endif // !FAST_COMPILE
 	Depth( "depth" , 8 ) ,
-	CGDepth( "cgDepth" , 0 ) ,
 	KernelDepth( "kernelDepth" ) ,
-	AdaptiveExponent( "adaptiveExp" , 1 ) ,
 	Iters( "iters" , 8 ) ,
-	VoxelDepth( "voxelDepth" , -1 ) ,
-	FullDepth( "fullDepth" , DEFAULT_FULL_DEPTH ) ,
+	FullDepth( "fullDepth" , 5 ) ,
+	BaseDepth( "baseDepth" , 0 ) ,
+	BaseVCycles( "baseVCycles" , 1 ) ,
 #ifndef FAST_COMPILE
-	BType( "bType" , BOUNDARY_NEUMANN+1 ) ,
+	BType( "bType" , DEFAULT_FEM_BOUNDARY+1 ) ,
 #endif // !FAST_COMPILE
-	MaxSolveDepth( "maxSolveDepth" ) ,
+	MaxMemoryGB( "maxMemory" , 0 ) ,
 	Threads( "threads" , omp_get_num_procs() );
 
-cmdLineFloat
-	Color( "color" , 16.f ) ,
+cmdLineParameter< float >
+	DataX( "data" , 32.f ) ,
 	SamplesPerNode( "samplesPerNode" , 1.5f ) ,
 	Scale( "scale" , 1.1f ) ,
-	CGSolverAccuracy( "cgAccuracy" , float(1e-3) ) ,
-	LowResIterMultiplier( "iterMultiplier" , 1.f ) , 
-	PointWeight( "pointWeight" , 4.f );
-
+	Width( "width" , 0.f ) ,
+	Confidence( "confidence" , 0.f ) ,
+	ConfidenceBias( "confidenceBias" , 0.f ) ,
+	CGSolverAccuracy( "cgAccuracy" , 1e-3f ) ,
+	PointWeight( "pointWeight" );
 
 cmdLineReadable* params[] =
 {
 #ifndef FAST_COMPILE
-	&Degree , &Double , &BType ,
+	&Degree , &BType ,
 #endif // !FAST_COMPILE
-	&In , &Depth , &Out , &XForm ,
-	&Scale , &Verbose , &CGSolverAccuracy , &NoComments , &LowResIterMultiplier ,
-	&KernelDepth , &SamplesPerNode , &Confidence , &NormalWeights , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual , &VoxelDepth ,
-	&PointWeight , &VoxelGrid , &Threads , &MaxSolveDepth ,
-	&AdaptiveExponent ,
+	&In , &Depth , &Out , &Transform ,
+	&Width ,
+	&Scale , &Verbose , &CGSolverAccuracy , &NoComments ,
+	&KernelDepth , &SamplesPerNode , &Confidence , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual ,
+	&ConfidenceBias ,
+	&BaseDepth , &BaseVCycles ,
+	&PointWeight ,
+	&Grid , &Threads ,
+	&Tree ,
 	&Density ,
 	&FullDepth ,
-	&CGDepth , &Iters ,
-	&Color ,
+	&Iters ,
+	&DataX ,
+	&Colors ,
+	&Normals ,
 	&LinearFit ,
-	&PrimalVoxel ,
+	&PrimalGrid ,
 	&TempDir ,
-#if defined( _WIN32 ) || defined( _WIN64 )
+	&ExactInterpolation ,
 	&Performance ,
-#endif // _WIN32 || _WIN64
+	&MaxMemoryGB ,
+	&InCore ,
+	NULL
 };
 
-
 void ShowUsage(char* ex)
 {
 	printf( "Usage: %s\n" , ex );
 	printf( "\t --%s <input points>\n" , In.name );
-
 	printf( "\t[--%s <ouput triangle mesh>]\n" , Out.name );
-
-	printf( "\t[--%s <ouput voxel grid>]\n" , VoxelGrid.name );
-
+	printf( "\t[--%s <ouput grid>]\n" , Grid.name );
+	printf( "\t[--%s <ouput fem tree>]\n" , Tree.name );
 #ifndef FAST_COMPILE
 	printf( "\t[--%s <b-spline degree>=%d]\n" , Degree.name , Degree.value );
-
 	printf( "\t[--%s <boundary type>=%d]\n" , BType.name , BType.value );
 	for( int i=0 ; i<BOUNDARY_COUNT ; i++ ) printf( "\t\t%d] %s\n" , i+1 , BoundaryNames[i] );
-#endif // FAST_COMPILE
-
+#endif // !FAST_COMPILE
 	printf( "\t[--%s <maximum reconstruction depth>=%d]\n" , Depth.name , Depth.value );
-
+	printf( "\t[--%s <grid width>]\n" , Width.name );
+	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
+	printf( "\t[--%s <coarse MG solver depth>=%d]\n" , BaseDepth.name , BaseDepth.value );
+	printf( "\t[--%s <coarse MG solver v-cycles>=%d]\n" , BaseVCycles.name , BaseVCycles.value );
 	printf( "\t[--%s <scale factor>=%f]\n" , Scale.name , Scale.value );
-
 	printf( "\t[--%s <minimum number of samples per node>=%f]\n" , SamplesPerNode.name, SamplesPerNode.value );
-
-	printf( "\t[--%s <interpolation weight>=%.3e]\n" , PointWeight.name , PointWeight.value );
-
-	printf( "\t[--%s]\n" , Confidence.name );
-
-	printf( "\t[--%s]\n" , NormalWeights.name );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <adaptive weighting exponent>=%d]\n", AdaptiveExponent.name , AdaptiveExponent.value );
-#endif // !FOR_RELEASE
-
+	printf( "\t[--%s <interpolation weight>=%.3e * <b-spline degree>]\n" , PointWeight.name , DefaultPointWeightMultiplier );
 	printf( "\t[--%s <iterations>=%d]\n" , Iters.name , Iters.value );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <low-resolution iteration multiplier>=%f]\n" , LowResIterMultiplier.name , LowResIterMultiplier.value );
-#endif // FOR_RELEASE
-
-	printf( "\t[--%s <conjugate-gradients depth>=%d]\n" , CGDepth.name , CGDepth.value );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <conjugate-gradients solver accuracy>=%g]\n" , CGSolverAccuracy.name , CGSolverAccuracy.value );
-#endif // !FOR_RELEASE
-
-	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
-
-	printf( "\t[--%s <depth at which to extract the voxel grid>=<%s>]\n" , VoxelDepth.name , Depth.name );
-
-	printf( "\t[--%s]\n" , PrimalVoxel.name );
-
-	printf( "\t[--%s <pull factor>]\n" , Color.name );
-
-	printf( "\t[--%s]\n" , Density.name );
-
-	printf( "\t[--%s]\n" , LinearFit.name );
-
-	printf( "\t[--%s]\n" , PolygonMesh.name);
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s]\n" , NonManifold.name );
-#endif // !FOR_RELEASE
-
+	printf( "\t[--%s]\n" , ExactInterpolation.name );
+	printf( "\t[--%s <pull factor>=%f]\n" , DataX.name , DataX.value );
+	printf( "\t[--%s]\n" , Colors.name );
+	printf( "\t[--%s]\n" , Normals.name );
 #ifdef _OPENMP
 	printf( "\t[--%s <num threads>=%d]\n" , Threads.name , Threads.value );
 #endif // _OPENMP
-
-	printf( "\t[--%s]\n" , TempDir.name );
-
-	printf( "\t[--%s]\n" , Verbose.name );
-
-#ifndef FOR_RELEASE
-#if defined( _WIN32 ) || defined( _WIN64 )
+	printf( "\t[--%s <normal confidence exponent>=%f]\n" , Confidence.name , Confidence.value );
+	printf( "\t[--%s <normal confidence bias exponent>=%f]\n" , ConfidenceBias.name , ConfidenceBias.value );
+	printf( "\t[--%s]\n" , NonManifold.name );
+	printf( "\t[--%s]\n" , PolygonMesh.name );
+	printf( "\t[--%s <cg solver accuracy>=%g]\n" , CGSolverAccuracy.name , CGSolverAccuracy.value );
+	printf( "\t[--%s <maximum memory (in GB)>=%d]\n" , MaxMemoryGB.name , MaxMemoryGB.value );
 	printf( "\t[--%s]\n" , Performance.name );
-#endif // _WIN32 || _WIN64
-#endif // !FOR_RELEASE
-
-#ifndef FOR_RELEASE
+	printf( "\t[--%s]\n" , Density.name );
+	printf( "\t[--%s]\n" , LinearFit.name );
+	printf( "\t[--%s]\n" , PrimalGrid.name );
 	printf( "\t[--%s]\n" , ASCII.name );
-	
 	printf( "\t[--%s]\n" , NoComments.name );
-
-#ifndef FAST_COMPILE
-	printf( "\t[--%s]\n" , Double.name );
-#endif // FAST_COMPILE
-#endif // !FOR_RELEASE
+	printf( "\t[--%s]\n" , TempDir.name );
+	printf( "\t[--%s]\n" , InCore.name );
+	printf( "\t[--%s]\n" , Verbose.name );
 }
 
-template< class Real >
-struct ColorInfo
-{
-	static Point3D< Real > ReadASCII( FILE* fp )
-	{
-		Point3D< unsigned char > c;
-		if( fscanf( fp , " %c %c %c " , &c[0] , &c[1] , &c[2] )!=3 ) fprintf( stderr , "[ERROR] Failed to read color\n" ) , exit( 0 );
-		return Point3D< Real >( (Real)c[0] , (Real)c[1] , (Real)c[2] );
-	};
-	static bool ValidPlyProperties( const bool* props ){ return ( props[0] || props[3] ) && ( props[1] || props[4] ) && ( props[2] || props[5] ); }
-	const static PlyProperty PlyProperties[];
-};
-template<>
-const PlyProperty ColorInfo< float >::PlyProperties[] =
-{
-	{ "r"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "g"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "b"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "red"   , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[0] ) ) , 0 , 0 , 0 , 0 } , 
-	{ "green" , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "blue"  , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[2] ) ) , 0 , 0 , 0 , 0 }
-};
-template<>
-const PlyProperty ColorInfo< double >::PlyProperties[] =
-{
-	{ "r"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "g"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "b"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "red"   , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[0] ) ) , 0 , 0 , 0 , 0 } , 
-	{ "green" , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "blue"  , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[2] ) ) , 0 , 0 , 0 , 0 }
-};
-
 double Weight( double v , double start , double end )
 {
 	v = ( v - start ) / ( end - start );
@@ -324,378 +196,533 @@ double Weight( double v , double start , double end )
 	}
 }
 
-#if defined( _WIN32 ) || defined( _WIN64 )
-double PeakMemoryUsageMB( void )
-{
-	HANDLE h = GetCurrentProcess();
-	PROCESS_MEMORY_COUNTERS pmc;
-	return GetProcessMemoryInfo( h , &pmc , sizeof(pmc) ) ? ( (double)pmc.PeakWorkingSetSize )/(1<<20) : 0;
-}
-#endif // _WIN32 || _WIN64
-
-
-template< class Real >
-struct OctreeProfiler
+template< unsigned int Dim , class Real >
+struct FEMTreeProfiler
 {
-	Octree< Real >& tree;
+	FEMTree< Dim , Real >& tree;
 	double t;
 
-	OctreeProfiler( Octree< Real >& t ) : tree(t) { ; }
-	void start( void ){ t = Time() , tree.resetLocalMemoryUsage(); }
+	FEMTreeProfiler( FEMTree< Dim , Real >& t ) : tree(t) { ; }
+	void start( void ){ t = Time() , FEMTree< Dim , Real >::ResetLocalMemoryUsage(); }
 	void print( const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
 	void dumpOutput( const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) DumpOutput( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         DumpOutput(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) DumpOutput( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         DumpOutput(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
-	void dumpOutput2( std::vector< char* >& comments , const char* header ) const
+	void dumpOutput2( std::vector< std::string >& comments , const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) DumpOutput2( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         DumpOutput2( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) DumpOutput2( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         DumpOutput2( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
 };
 
-template< class Real >
-XForm4x4< Real > GetPointXForm( OrientedPointStream< Real >& stream , Real scaleFactor )
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetBoundingBoxXForm( Point< Real , Dim > min , Point< Real , Dim > max , Real scaleFactor )
 {
-	Point3D< Real > min , max;
-	stream.boundingBox( min , max );
-	Point3D< Real > center = ( max + min ) / 2;
-	Real scale = std::max< Real >( max[0]-min[0] , std::max< Real >( max[1]-min[1] , max[2]-min[2] ) );
+	Point< Real , Dim > center = ( max + min ) / 2;
+	Real scale = max[0] - min[0];
+	for( int d=1 ; d<Dim ; d++ ) scale = std::max< Real >( scale , max[d]-min[d] );
 	scale *= scaleFactor;
-	for( int i=0 ; i<3 ; i++ ) center[i] -= scale/2;
-	XForm4x4< Real > tXForm = XForm4x4< Real >::Identity() , sXForm = XForm4x4< Real >::Identity();
-	for( int i=0 ; i<3 ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(3,i) = -center[i];
+	for( int i=0 ; i<Dim ; i++ ) center[i] -= scale/2;
+	XForm< Real , Dim+1 > tXForm = XForm< Real , Dim+1 >::Identity() , sXForm = XForm< Real , Dim+1 >::Identity();
+	for( int i=0 ; i<Dim ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(Dim,i) = -center[i];
+	return sXForm * tXForm;
+}
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetBoundingBoxXForm( Point< Real , Dim > min , Point< Real , Dim > max , Real width , Real scaleFactor , int& depth )
+{
+	// Get the target resolution (along the largest dimension)
+	Real resolution = ( max[0]-min[0] ) / width;
+	for( int d=1 ; d<Dim ; d++ ) resolution = std::max< Real >( resolution , ( max[d]-min[d] ) / width );
+	resolution *= scaleFactor;
+	depth = 0;
+	while( (1<<depth)<resolution ) depth++;
+
+	Point< Real , Dim > center = ( max + min ) / 2;
+	Real scale = (1<<depth) * width;
+
+	for( int i=0 ; i<Dim ; i++ ) center[i] -= scale/2;
+	XForm< Real , Dim+1 > tXForm = XForm< Real , Dim+1 >::Identity() , sXForm = XForm< Real , Dim+1 >::Identity();
+	for( int i=0 ; i<Dim ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(Dim,i) = -center[i];
 	return sXForm * tXForm;
 }
 
-template< class Real , int Degree , BoundaryType BType , class Vertex >
-int _Execute( int argc , char* argv[] )
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetPointXForm( InputPointStream< Real , Dim >& stream , Real width , Real scaleFactor , int& depth )
 {
-	typedef typename Octree< Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
-	typedef typename Octree< Real >::template InterpolationInfo< false > InterpolationInfo;
-	typedef OrientedPointStream< Real > PointStream;
-	typedef OrientedPointStreamWithData< Real , Point3D< Real > > PointStreamWithData;
-	typedef TransformedOrientedPointStream< Real > XPointStream;
-	typedef TransformedOrientedPointStreamWithData< Real , Point3D< Real > > XPointStreamWithData;
-	Reset< Real >();
-	int paramNum = sizeof(params)/sizeof(cmdLineReadable*);
-	std::vector< char* > comments;
-
-	if( Verbose.set ) echoStdout=1;
-
-	XForm4x4< Real > xForm , iXForm;
-	if( XForm.set )
+	Point< Real , Dim > min , max;
+	stream.boundingBox( min , max );
+	return GetBoundingBoxXForm( min , max , width , scaleFactor , depth );
+}
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetPointXForm( InputPointStream< Real , Dim >& stream , Real scaleFactor )
+{
+	Point< Real , Dim > min , max;
+	stream.boundingBox( min , max );
+	return GetBoundingBoxXForm( min , max , scaleFactor );
+}
+
+template< unsigned int Dim , typename Real >
+struct ConstraintDual
+{
+	Real target , weight;
+	ConstraintDual( Real t , Real w ) : target(t) , weight(w){ }
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( const Point< Real , Dim >& p ) const { return CumulativeDerivativeValues< Real , Dim , 0 >( target*weight ); };
+};
+template< unsigned int Dim , typename Real >
+struct SystemDual
+{
+	Real weight;
+	SystemDual( Real w ) : weight(w){ }
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( const Point< Real , Dim >& p , const CumulativeDerivativeValues< Real , Dim , 0 >& dValues ) const { return dValues * weight; };
+	CumulativeDerivativeValues< double , Dim , 0 > operator()( const Point< Real , Dim >& p , const CumulativeDerivativeValues< double , Dim , 0 >& dValues ) const { return dValues * weight; };
+};
+template< unsigned int Dim >
+struct SystemDual< Dim , double >
+{
+	typedef double Real;
+	Real weight;
+	SystemDual( Real w ) : weight(w){ }
+	CumulativeDerivativeValues< Real , Dim , 0 > operator()( const Point< Real , Dim >& p , const CumulativeDerivativeValues< Real , Dim , 0 >& dValues ) const { return dValues * weight; };
+};
+
+template< typename Vertex , typename Real , unsigned int ... FEMSigs , typename ... SampleData >
+void ExtractMesh( UIntPack< FEMSigs ... > , std::tuple< SampleData ... > , FEMTree< sizeof ... ( FEMSigs ) , Real >& tree , const DenseNodeData< Real , UIntPack< FEMSigs ... > >& solution , Real isoValue , const std::vector< typename FEMTree< sizeof ... ( FEMSigs ) , Real >::PointSample >* samples , std::vector< MultiPointStreamData< Real , PointStreamNormal< Real , DIMENSION > , MultiPointStreamData< Real , SampleData ... > > >* sampleData , const typename FEMTree< sizeof ... ( FEMSigs ) , Real >::template DensityEstimator< WEIGHT_DEGREE >* density , std::function< void ( Vertex& , Point< Real , DIMENSION > , Real , MultiPointStreamData< Real , PointStreamNormal< Real , DIMENSION > , MultiPointStreamData< Real , SampleData ... > > ) > SetVertex , std::vector< std::string > &comments , XForm< Real , sizeof...(FEMSigs)+1 > iXForm )
+{
+	static const int Dim = sizeof ... ( FEMSigs );
+	typedef UIntPack< FEMSigs ... > Sigs;
+	typedef PointStreamNormal< Real , Dim > NormalPointSampleData;
+	typedef MultiPointStreamData< Real , SampleData ... > AdditionalPointSampleData;
+	typedef MultiPointStreamData< Real , NormalPointSampleData , AdditionalPointSampleData > TotalPointSampleData;
+	static const unsigned int DataSig = FEMDegreeAndBType< DATA_DEGREE , BOUNDARY_FREE >::Signature;
+	typedef typename FEMTree< Dim , Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
+
+	FEMTreeProfiler< Dim , Real > profiler( tree );
+
+	char tempHeader[1024];
 	{
-		FILE* fp = fopen( XForm.value , "r" );
+		char tempPath[1024];
+		tempPath[0] = 0;
+		if( TempDir.set ) strcpy( tempPath , TempDir.value );
+		else SetTempDirectory( tempPath , sizeof(tempPath) );
+		if( strlen(tempPath)==0 ) sprintf( tempPath , ".%c" , FileSeparator );
+		if( tempPath[ strlen( tempPath )-1 ]==FileSeparator ) sprintf( tempHeader , "%sPR_" , tempPath );
+		else                                                  sprintf( tempHeader , "%s%cPR_" , tempPath , FileSeparator );
+	}
+	CoredMeshData< Vertex > *mesh;
+	if( InCore.set ) mesh = new CoredVectorMeshData< Vertex >();
+	else             mesh = new CoredFileMeshData< Vertex >( tempHeader );
+
+	profiler.start();
+	typename IsoSurfaceExtractor< Dim , Real , Vertex >::IsoStats isoStats;
+	if( sampleData )
+	{
+		SparseNodeData< ProjectiveData< TotalPointSampleData , Real > , IsotropicUIntPack< Dim , DataSig > > _sampleData = tree.template setDataField< DataSig , false >( *samples , *sampleData , (DensityEstimator*)NULL );
+		for( const RegularTreeNode< Dim , FEMTreeNodeData >* n = tree.tree().nextNode() ; n ; n=tree.tree().nextNode( n ) )
+		{
+			ProjectiveData< TotalPointSampleData , Real >* clr = _sampleData( n );
+			if( clr ) (*clr) *= (Real)pow( DataX.value , tree.depth( n ) );
+		}
+		isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , &_sampleData , solution , isoValue , *mesh , SetVertex , !LinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+	}
+#if defined( __GNUC__ ) && __GNUC__ < 5
+	#warning "you've got me gcc version<5"
+	else isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , (SparseNodeData< ProjectiveData< TotalPointSampleData , Real > , IsotropicUIntPack< Dim , DataSig > > *)NULL , solution , isoValue , *mesh , SetVertex , !LinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+#else // !__GNUC__ || __GNUC__ >=5
+	else isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , NULL , solution , isoValue , *mesh , SetVertex , !LinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+#endif // __GNUC__ || __GNUC__ < 4
+	messageWriter( "Vertices / Polygons: %d / %d\n" , mesh->outOfCorePointCount()+mesh->inCorePoints.size() , mesh->polygonCount() );
+	std::string isoStatsString = isoStats.toString() + std::string( "\n" );
+	messageWriter( isoStatsString.c_str() );
+	if( PolygonMesh.set ) profiler.dumpOutput2( comments , "#         Got polygons:" );
+	else                  profiler.dumpOutput2( comments , "#        Got triangles:" );
+
+	std::vector< std::string > noComments;
+	if( !PlyWritePolygons< Vertex , Real , Dim >( Out.value , mesh , ASCII.set ? PLY_ASCII : PLY_BINARY_NATIVE , NoComments.set ? noComments : comments , iXForm ) )
+		ERROR_OUT( "Could not write mesh to: %s" , Out.value );
+
+	delete mesh;
+}
+
+template< typename Real , unsigned int Dim >
+void WriteGrid( ConstPointer( Real ) values , int res , const char *fileName )
+{
+	int resolution = 1;
+	for( int d=0 ; d<Dim ; d++ ) resolution *= res;
+
+	char *ext = GetFileExtension( fileName );
+
+	if( Dim==2 && ImageWriter::ValidExtension( ext ) )
+	{
+		Real avg = 0;
+#pragma omp parallel for reduction( + : avg )
+		for( int i=0 ; i<resolution ; i++ ) avg += values[i];
+		avg /= (Real)resolution;
+
+		Real std = 0;
+#pragma omp parallel for reduction( + : std )
+		for( int i=0 ; i<resolution ; i++ ) std += ( values[i] - avg ) * ( values[i] - avg );
+		std = (Real)sqrt( std / resolution );
+
+		if( Verbose.set ) printf( "Grid to image: [%.2f,%.2f] -> [0,255]\n" , avg - 2*std , avg + 2*std );
+
+		unsigned char *pixels = new unsigned char[ resolution*3 ];
+#pragma omp parallel for
+		for( int i=0 ; i<resolution ; i++ )
+		{
+			Real v = (Real)std::min< Real >( (Real)1. , std::max< Real >( (Real)-1. , ( values[i] - avg ) / (2*std ) ) );
+			v = (Real)( ( v + 1. ) / 2. * 256. );
+			unsigned char color = (unsigned char )std::min< Real >( (Real)255. , std::max< Real >( (Real)0. , v ) );
+			for( int c=0 ; c<3 ; c++ ) pixels[i*3+c ] = color;
+		}
+		ImageWriter::Write( fileName , pixels , res , res , 3 );
+		delete[] pixels;
+	}
+	else
+	{
+
+		FILE *fp = fopen( fileName , "wb" );
+		if( !fp ) ERROR_OUT( "Failed to open grid file for writing: %s" , fileName );
+		else
+		{
+			fwrite( &res , sizeof(int) , 1 , fp );
+			if( typeid(Real)==typeid(float) ) fwrite( values , sizeof(float) , resolution , fp );
+			else
+			{
+				float *fValues = new float[resolution];
+				for( int i=0 ; i<resolution ; i++ ) fValues[i] = float( values[i] );
+				fwrite( fValues , sizeof(float) , resolution , fp );
+				delete[] fValues;
+			}
+			fclose( fp );
+		}
+	}
+	delete[] ext;
+}
+
+
+template< class Real , typename ... SampleData , unsigned int ... FEMSigs >
+void Execute( int argc , char* argv[] , UIntPack< FEMSigs ... > )
+{
+	static const int Dim = sizeof ... ( FEMSigs );
+	typedef UIntPack< FEMSigs ... > Sigs;
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > Degrees;
+	typedef UIntPack< FEMDegreeAndBType< NORMAL_DEGREE , DerivativeBoundary< FEMSignature< FEMSigs >::BType , 1 >::BType >::Signature ... > NormalSigs;
+	static const unsigned int DataSig = FEMDegreeAndBType< DATA_DEGREE , BOUNDARY_FREE >::Signature;
+	typedef typename FEMTree< Dim , Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
+	typedef typename FEMTree< Dim , Real >::template InterpolationInfo< Real , 0 > InterpolationInfo;
+	typedef PointStreamNormal< Real , Dim > NormalPointSampleData;
+	typedef MultiPointStreamData< Real , SampleData ... > AdditionalPointSampleData;
+	typedef MultiPointStreamData< Real , NormalPointSampleData , AdditionalPointSampleData > TotalPointSampleData;
+	typedef InputPointStreamWithData< Real , Dim , TotalPointSampleData > InputPointStream;
+	typedef TransformedInputPointStreamWithData< Real , Dim , TotalPointSampleData > XInputPointStream;
+	std::vector< std::string > comments;
+	messageWriter( comments , "*************************************************************\n" );
+	messageWriter( comments , "*************************************************************\n" );
+	messageWriter( comments , "** Running Screened Poisson Reconstruction (Version %s) **\n" , VERSION );
+	messageWriter( comments , "*************************************************************\n" );
+	messageWriter( comments , "*************************************************************\n" );
+
+	XForm< Real , Dim+1 > xForm , iXForm;
+	if( Transform.set )
+	{
+		FILE* fp = fopen( Transform.value , "r" );
 		if( !fp )
 		{
-			fprintf( stderr , "[WARNING] Could not read x-form from: %s\n" , XForm.value );
-			xForm = XForm4x4< Real >::Identity();
+			WARN( "Could not read x-form from: %s" , Transform.value );
+			xForm = XForm< Real , Dim+1 >::Identity();
 		}
 		else
 		{
-			for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ )
+			for( int i=0 ; i<Dim+1 ; i++ ) for( int j=0 ; j<Dim+1 ; j++ )
 			{
 				float f;
-				if( fscanf( fp , " %f " , &f )!=1 ) fprintf( stderr , "[ERROR] Execute: Failed to read xform\n" ) , exit( 0 );
+				if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read xform" );
 				xForm(i,j) = (Real)f;
 			}
 			fclose( fp );
 		}
 	}
-	else xForm = XForm4x4< Real >::Identity();
+	else xForm = XForm< Real , Dim+1 >::Identity();
 
-	DumpOutput2( comments , "Running Screened Poisson Reconstruction (Version 9.011)\n" );
 	char str[1024];
-	for( int i=0 ; i<paramNum ; i++ )
+	for( int i=0 ; params[i] ; i++ )
 		if( params[i]->set )
 		{
 			params[i]->writeValue( str );
-			if( strlen( str ) ) DumpOutput2( comments , "\t--%s %s\n" , params[i]->name , str );
-			else                DumpOutput2( comments , "\t--%s\n" , params[i]->name );
+			if( strlen( str ) ) messageWriter( comments , "\t--%s %s\n" , params[i]->name , str );
+			else                messageWriter( comments , "\t--%s\n" , params[i]->name );
 		}
 
 	double startTime = Time();
 	Real isoValue = 0;
 
-	Octree< Real > tree;
-	OctreeProfiler< Real > profiler( tree );
-	tree.threads = Threads.value;
-	if( !In.set )
-	{
-		ShowUsage( argv[0] );
-		return 0;
-	}
-	if( !MaxSolveDepth.set ) MaxSolveDepth.value = Depth.value;
-	
-	OctNode< TreeNodeData >::SetAllocator( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	FEMTree< Dim , Real > tree( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	FEMTreeProfiler< Dim , Real > profiler( tree );
 
-	int kernelDepth = KernelDepth.set ? KernelDepth.value : Depth.value-2;
-	if( kernelDepth>Depth.value )
+	if( Depth.set && Width.value>0 )
 	{
-		fprintf( stderr,"[WARNING] %s can't be greater than %s: %d <= %d\n" , KernelDepth.name , Depth.name , KernelDepth.value , Depth.value );
-		kernelDepth = Depth.value;
+		WARN( "Both --%s and --%s set, ignoring --%s" , Depth.name , Width.name , Width.name );
+		Width.value = 0;
 	}
 
 	int pointCount;
 
 	Real pointWeightSum;
-	std::vector< typename Octree< Real >::PointSample >* samples = new std::vector< typename Octree< Real >::PointSample >();
-	std::vector< ProjectiveData< Point3D< Real > , Real > >* sampleData = NULL;
+	std::vector< typename FEMTree< Dim , Real >::PointSample >* samples = new std::vector< typename FEMTree< Dim , Real >::PointSample >();
+	std::vector< TotalPointSampleData >* sampleData = NULL;
 	DensityEstimator* density = NULL;
-	SparseNodeData< Point3D< Real > , NORMAL_DEGREE >* normalInfo = NULL;
+	SparseNodeData< Point< Real , Dim > , NormalSigs >* normalInfo = NULL;
 	Real targetValue = (Real)0.5;
+
 	// Read in the samples (and color data)
 	{
 		profiler.start();
-		PointStream* pointStream;
+		InputPointStream* pointStream;
 		char* ext = GetFileExtension( In.value );
-		if( Color.set && Color.value>0 )
+		sampleData = new std::vector< TotalPointSampleData >();
+		std::vector< std::pair< Point< Real , Dim > , TotalPointSampleData > > inCorePoints;
+		if( InCore.set )
 		{
-			sampleData = new std::vector< ProjectiveData< Point3D< Real > , Real > >();
-			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryOrientedPointStreamWithData< Real , Point3D< Real > , float , Point3D< unsigned char > >( In.value );
-			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYOrientedPointStreamWithData< Real , Point3D< Real > >( In.value , ColorInfo< Real >::PlyProperties , 6 , ColorInfo< Real >::ValidPlyProperties );
-			else                                    pointStream = new  ASCIIOrientedPointStreamWithData< Real , Point3D< Real > >( In.value , ColorInfo< Real >::ReadASCII );
+			InputPointStream *_pointStream;
+			if     ( !strcasecmp( ext , "bnpts" ) ) _pointStream = new BinaryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadBinary );
+			else if( !strcasecmp( ext , "ply"   ) ) _pointStream = new    PLYInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::PlyReadProperties() , TotalPointSampleData::PlyReadNum , TotalPointSampleData::ValidPlyReadProperties );
+			else                                    _pointStream = new  ASCIIInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadASCII );
+			Point< Real , Dim > p;
+			TotalPointSampleData d;
+			while( _pointStream->nextPoint( p , d ) ) inCorePoints.push_back( std::pair< Point< Real , Dim > , TotalPointSampleData >( p , d ) );
+			delete _pointStream;
+
+			pointStream = new MemoryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( inCorePoints.size() , &inCorePoints[0] );
 		}
 		else
 		{
-			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryOrientedPointStream< Real , float >( In.value );
-			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYOrientedPointStream< Real >( In.value );
-			else                                    pointStream = new  ASCIIOrientedPointStream< Real >( In.value );
+			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadBinary );
+			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::PlyReadProperties() , TotalPointSampleData::PlyReadNum , TotalPointSampleData::ValidPlyReadProperties );
+			else                                    pointStream = new  ASCIIInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadASCII );
 		}
 		delete[] ext;
-		XPointStream _pointStream( xForm , *pointStream );
-		xForm = GetPointXForm( _pointStream , (Real)Scale.value ) * xForm;
-		if( sampleData )
+		typename TotalPointSampleData::Transform _xForm( xForm );
+		XInputPointStream _pointStream( [&]( Point< Real , Dim >& p , TotalPointSampleData& d ){ p = xForm*p , d = _xForm(d); } , *pointStream );
+		if( Width.value>0 ) xForm = GetPointXForm< Real , Dim >( _pointStream , Width.value , (Real)( Scale.value>0 ? Scale.value : 1. ) , Depth.value ) * xForm;
+		else                xForm = Scale.value>0 ? GetPointXForm< Real , Dim >( _pointStream , (Real)Scale.value ) * xForm : xForm;
 		{
-			XPointStreamWithData _pointStream( xForm , ( PointStreamWithData& )*pointStream );
-			pointCount = tree.template init< Point3D< Real > >( _pointStream , Depth.value , Confidence.set , *samples , sampleData );
-		}
-		else
-		{
-			XPointStream _pointStream( xForm , *pointStream );
-			pointCount = tree.template init< Point3D< Real > >( _pointStream , Depth.value , Confidence.set , *samples , sampleData );
+			typename TotalPointSampleData::Transform _xForm( xForm );
+			XInputPointStream _pointStream( [&]( Point< Real , Dim >& p , TotalPointSampleData& d ){ p = xForm*p , d = _xForm(d); } , *pointStream );
+			auto ProcessDataWithConfidence = [&]( const Point< Real , Dim >& p , TotalPointSampleData& d )
+			{
+				Real l = (Real)Length( std::get< 0 >( d.data ).data );
+				if( !l || l!=l ) return (Real)-1.;
+				return (Real)pow( l , Confidence.value );
+			};
+			auto ProcessData = []( const Point< Real , Dim >& p , TotalPointSampleData& d )
+			{
+				Real l = (Real)Length( std::get< 0 >( d.data ).data );
+				if( !l || l!=l ) return (Real)-1.;
+				std::get< 0 >( d.data ).data /= l;
+				return (Real)1.;
+			};
+			if( Confidence.value>0 ) pointCount = FEMTreeInitializer< Dim , Real >::template Initialize< TotalPointSampleData >( tree.spaceRoot() , _pointStream , Depth.value , *samples , *sampleData , true , tree.nodeAllocator , tree.initializer() , ProcessDataWithConfidence );
+			else                     pointCount = FEMTreeInitializer< Dim , Real >::template Initialize< TotalPointSampleData >( tree.spaceRoot() , _pointStream , Depth.value , *samples , *sampleData , true , tree.nodeAllocator , tree.initializer() , ProcessData );
 		}
 		iXForm = xForm.inverse();
 		delete pointStream;
-#pragma omp parallel for num_threads( Threads.value )
-		for( int i=0 ; i<(int)samples->size() ; i++ ) (*samples)[i].sample.data.n *= (Real)-1;
 
-		DumpOutput( "Input Points / Samples: %d / %d\n" , pointCount , samples->size() );
+		messageWriter( "Input Points / Samples: %d / %d\n" , pointCount , samples->size() );
 		profiler.dumpOutput2( comments , "# Read input into tree:" );
 	}
-	DenseNodeData< Real , Degree > solution;
+	int kernelDepth = KernelDepth.set ? KernelDepth.value : Depth.value-2;
+	if( kernelDepth>Depth.value )
+	{
+		WARN( "%s can't be greater than %s: %d <= %d" , KernelDepth.name , Depth.name , KernelDepth.value , Depth.value );
+		kernelDepth = Depth.value;
+	}
 
+	DenseNodeData< Real , Sigs > solution;
 	{
-		DenseNodeData< Real , Degree > constraints;
+		DenseNodeData< Real , Sigs > constraints;
 		InterpolationInfo* iInfo = NULL;
-		int solveDepth = MaxSolveDepth.value;
+		int solveDepth = Depth.value;
 
 		tree.resetNodeIndices();
 
-		// Get the kernel density estimator [If discarding, compute anew. Otherwise, compute once.]
+		// Get the kernel density estimator
 		{
 			profiler.start();
-			density = tree.template setDensityEstimator< WEIGHT_DEGREE >( *samples , kernelDepth , SamplesPerNode.value );
+			density = tree.template setDensityEstimator< WEIGHT_DEGREE >( *samples , kernelDepth , SamplesPerNode.value , 1 );
 			profiler.dumpOutput2( comments , "#   Got kernel density:" );
 		}
 
-		// Transform the Hermite samples into a vector field [If discarding, compute anew. Otherwise, compute once.]
+		// Transform the Hermite samples into a vector field
 		{
 			profiler.start();
-			normalInfo = new SparseNodeData< Point3D< Real > , NORMAL_DEGREE >();
-			*normalInfo = tree.template setNormalField< NORMAL_DEGREE >( *samples , *density , pointWeightSum , BType==BOUNDARY_NEUMANN );
+			normalInfo = new SparseNodeData< Point< Real , Dim > , NormalSigs >();
+			if( ConfidenceBias.value>0 ) *normalInfo = tree.setNormalField( NormalSigs() , *samples , *sampleData , density , pointWeightSum , [&]( Real conf ){ return (Real)( log( conf ) * ConfidenceBias.value / log( 1<<(Dim-1) ) ); } );
+			else                         *normalInfo = tree.setNormalField( NormalSigs() , *samples , *sampleData , density , pointWeightSum );
+#pragma omp parallel for
+			for( int i=0 ; i<normalInfo->size() ; i++ ) (*normalInfo)[i] *= (Real)-1.;
 			profiler.dumpOutput2( comments , "#     Got normal field:" );
+			messageWriter( "Point weight / Estimated Area: %g / %g\n" , pointWeightSum , pointCount*pointWeightSum );
 		}
 
 		if( !Density.set ) delete density , density = NULL;
+		if( DataX.value<=0 || ( !Colors.set && !Normals.set ) ) delete sampleData , sampleData = NULL;
 
 		// Trim the tree and prepare for multigrid
 		{
 			profiler.start();
-			std::vector< int > indexMap;
-
-			constexpr int MAX_DEGREE = NORMAL_DEGREE > Degree ? NORMAL_DEGREE : Degree;
-			tree.template inalizeForBroodedMultigrid< MAX_DEGREE , Degree , BType >( FullDepth.value , typename Octree< Real >::template HasNormalDataFunctor< NORMAL_DEGREE >( *normalInfo ) , &indexMap );
-
-			if( normalInfo ) normalInfo->remapIndices( indexMap );
-			if( density ) density->remapIndices( indexMap );
+			constexpr int MAX_DEGREE = NORMAL_DEGREE > Degrees::Max() ? NORMAL_DEGREE : Degrees::Max();
+			tree.template finalizeForMultigrid< MAX_DEGREE >( FullDepth.value , typename FEMTree< Dim , Real >::template HasNormalDataFunctor< NormalSigs >( *normalInfo ) , normalInfo , density );
 			profiler.dumpOutput2( comments , "#       Finalized tree:" );
 		}
-
 		// Add the FEM constraints
 		{
 			profiler.start();
-			constraints = tree.template initDenseNodeData< Degree >( );
-			tree.template addFEMConstraints< Degree , BType , NORMAL_DEGREE , BType >( FEMVFConstraintFunctor< NORMAL_DEGREE , BType , Degree , BType >( 1. , 0. ) , *normalInfo , constraints , solveDepth );
+			constraints = tree.initDenseNodeData( Sigs() );
+			typename FEMIntegrator::template Constraint< Sigs , IsotropicUIntPack< Dim , 1 > , NormalSigs , IsotropicUIntPack< Dim , 0 > , Dim > F;
+			unsigned int derivatives2[Dim];
+			for( int d=0 ; d<Dim ; d++ ) derivatives2[d] = 0;
+			typedef IsotropicUIntPack< Dim , 1 > Derivatives1;
+			typedef IsotropicUIntPack< Dim , 0 > Derivatives2;
+			for( int d=0 ; d<Dim ; d++ )
+			{
+				unsigned int derivatives1[Dim];
+				for( int dd=0 ; dd<Dim ; dd++ ) derivatives1[dd] = dd==d ?  1 : 0;
+				F.weights[d][ TensorDerivatives< Derivatives1 >::Index( derivatives1 ) ][ TensorDerivatives< Derivatives2 >::Index( derivatives2 ) ] = 1;
+			}
+			tree.addFEMConstraints( F , *normalInfo , constraints , solveDepth );
 			profiler.dumpOutput2( comments , "#  Set FEM constraints:" );
 		}
 
-		// Free up the normal info [If we don't need it for subseequent iterations.]
+		// Free up the normal info
 		delete normalInfo , normalInfo = NULL;
 
 		// Add the interpolation constraints
 		if( PointWeight.value>0 )
 		{
 			profiler.start();
-			iInfo = new InterpolationInfo( tree , *samples , targetValue , AdaptiveExponent.value , (Real)PointWeight.value * pointWeightSum , (Real)0 );
-			tree.template addInterpolationConstraints< Degree , BType >( *iInfo , constraints , solveDepth );
+			if( ExactInterpolation.set ) iInfo = FEMTree< Dim , Real >::template       InitializeExactPointInterpolationInfo< Real , 0 > ( tree , *samples , ConstraintDual< Dim , Real >( targetValue , (Real)PointWeight.value * pointWeightSum ) , SystemDual< Dim , Real >( (Real)PointWeight.value * pointWeightSum ) , true , false );
+			else                         iInfo = FEMTree< Dim , Real >::template InitializeApproximatePointInterpolationInfo< Real , 0 > ( tree , *samples , ConstraintDual< Dim , Real >( targetValue , (Real)PointWeight.value * pointWeightSum ) , SystemDual< Dim , Real >( (Real)PointWeight.value * pointWeightSum ) , true , 1 );
+			tree.addInterpolationConstraints( constraints , solveDepth , *iInfo );
 			profiler.dumpOutput2( comments , "#Set point constraints:" );
 		}
 
-		DumpOutput( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
-		DumpOutput( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage())/(1<<20) );
-
+		messageWriter( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
+		messageWriter( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage())/(1<<20) );
+		
 		// Solve the linear system
 		{
 			profiler.start();
-			typename Octree< Real >::SolverInfo solverInfo;
-			solverInfo.cgDepth = CGDepth.value , solverInfo.iters = Iters.value , solverInfo.cgAccuracy = CGSolverAccuracy.value , solverInfo.verbose = Verbose.set , solverInfo.showResidual = ShowResidual.set , solverInfo.lowResIterMultiplier = std::max< double >( 1. , LowResIterMultiplier.value );
-			solution = tree.template solveSystem< Degree , BType >( FEMSystemFunctor< Degree , BType >( 0 , 1. , 0 ) , iInfo , constraints , solveDepth , solverInfo );
+			typename FEMTree< Dim , Real >::SolverInfo sInfo;
+			sInfo.cgDepth = 0 , sInfo.cascadic = true , sInfo.vCycles = 1 , sInfo.iters = Iters.value , sInfo.cgAccuracy = CGSolverAccuracy.value , sInfo.verbose = Verbose.set , sInfo.showResidual = ShowResidual.set , sInfo.showGlobalResidual = SHOW_GLOBAL_RESIDUAL_NONE , sInfo.sliceBlockSize = 1;
+			sInfo.baseDepth = BaseDepth.value , sInfo.baseVCycles = BaseVCycles.value;
+			typename FEMIntegrator::template System< Sigs , IsotropicUIntPack< Dim , 1 > > F( { 0. , 1. } );
+			solution = tree.solveSystem( Sigs() , F , constraints , solveDepth , sInfo , iInfo );
 			profiler.dumpOutput2( comments , "# Linear system solved:" );
 			if( iInfo ) delete iInfo , iInfo = NULL;
 		}
 	}
 
-	char tempHeader[1024];
-	{
-#if defined( _WIN32 ) || defined( _WIN64 )
-		const char FileSeparator = '\\';
-#else // !_WIN
-		const char FileSeparator = '/';
-#endif // _WIN
-		char tempPath[1024];
-		tempPath[0] = 0;
-		if( TempDir.set ) strcpy( tempPath , TempDir.value );
-		else
-		{
-#if defined( _WIN32 ) || defined( _WIN64 )
-			GetTempPath( sizeof(tempPath) , tempPath );
-#else // !_WIN
-			if( std::getenv( "TMPDIR" ) ) strcpy( tempPath , std::getenv( "TMPDIR" ) );
-#endif // _WIN
-		}
-		if( strlen(tempPath)==0 ) sprintf( tempPath , ".%c" , FileSeparator );
-		if( tempPath[ strlen( tempPath )-1 ]==FileSeparator ) sprintf( tempHeader , "%sPR_" , tempPath );
-		else                                                  sprintf( tempHeader , "%s%cPR_" , tempPath , FileSeparator );
-	}
-	CoredFileMeshData< Vertex > mesh( tempHeader );
-
 	{
 		profiler.start();
 		double valueSum = 0 , weightSum = 0;
-		typename Octree< Real >::template MultiThreadedEvaluator< Degree , BType > evaluator( &tree , solution , Threads.value );
-#pragma omp parallel for num_threads( Threads.value ) reduction( + : valueSum , weightSum )
+		typename FEMTree< Dim , Real >::template MultiThreadedEvaluator< Sigs , 0 > evaluator( &tree , solution );
+#pragma omp parallel for reduction( + : valueSum , weightSum )
 		for( int j=0 ; j<samples->size() ; j++ )
 		{
-			ProjectiveData< OrientedPoint3D< Real > , Real >& sample = (*samples)[j].sample;
+			ProjectiveData< Point< Real , Dim > , Real >& sample = (*samples)[j].sample;
 			Real w = sample.weight;
-			if( w>0 ) weightSum += w , valueSum += evaluator.value( sample.data.p / sample.weight , omp_get_thread_num() , (*samples)[j].node ) * w;
+			if( w>0 ) weightSum += w , valueSum += evaluator.values( sample.data / sample.weight , omp_get_thread_num() , (*samples)[j].node )[0] * w;
 		}
 		isoValue = (Real)( valueSum / weightSum );
-		if( !( Color.set && Color.value>0 ) && samples ) delete samples , samples = NULL;
+		if( DataX.value<=0 || ( !Colors.set && !Normals.set ) ) delete samples , samples = NULL;
 		profiler.dumpOutput( "Got average:" );
-		DumpOutput( "Iso-Value: %e\n" , isoValue );
+		messageWriter( "Iso-Value: %e = %g / %g\n" , isoValue , valueSum , weightSum );
+	}
+	if( Tree.set )
+	{
+		FILE* fp = fopen( Tree.value , "wb" );
+		if( !fp ) ERROR_OUT( "Failed to open file for writing: %s" , Tree.value );
+		FEMTree< Dim , Real >::WriteParameter( fp );
+		DenseNodeData< Real , Sigs >::WriteSignatures( fp );
+		tree.write( fp );
+		solution.write( fp );
+		fclose( fp );
 	}
 
-	if( VoxelGrid.set )
+	if( Grid.set )
 	{
+		int res = 0;
 		profiler.start();
-		FILE* fp = fopen( VoxelGrid.value , "wb" );
-		if( !fp ) fprintf( stderr , "Failed to open voxel file for writing: %s\n" , VoxelGrid.value );
-		else
+		Pointer( Real ) values = tree.template regularGridEvaluate< true >( solution , res , -1 , PrimalGrid.set );
+		int resolution = 1;
+		for( int d=0 ; d<Dim ; d++ ) resolution *= res;
+#pragma omp parallel for
+		for( int i=0 ; i<resolution ; i++ ) values[i] -= isoValue;
+		profiler.dumpOutput( "Got grid:" );
+		WriteGrid< Real , DIMENSION >( values , res , Grid.value );
+		DeletePointer( values );
+		if( Verbose.set )
 		{
-			int res = 0;
-			Pointer( Real ) values = tree.template voxelEvaluate< Real , Degree , BType >( solution , res , isoValue , VoxelDepth.value , PrimalVoxel.set );
-			fwrite( &res , sizeof(int) , 1 , fp );
-			if( sizeof(Real)==sizeof(float) ) fwrite( values , sizeof(float) , res*res*res , fp );
-			else
+			printf( "Transform:\n" );
+			for( int i=0 ; i<Dim+1 ; i++ )
 			{
-				float *fValues = new float[res*res*res];
-				for( int i=0 ; i<res*res*res ; i++ ) fValues[i] = float( values[i] );
-				fwrite( fValues , sizeof(float) , res*res*res , fp );
-				delete[] fValues;
+				printf( "\t" );
+				for( int j=0 ; j<Dim+1 ; j++ ) printf( " %f" , iXForm(j,i) );
+				printf( "\n" );
 			}
-			fclose( fp );
-			DeletePointer( values );
 		}
-		profiler.dumpOutput( "Got voxel grid:" );
 	}
 
 	if( Out.set )
 	{
-		profiler.start();
-		SparseNodeData< ProjectiveData< Point3D< Real > , Real > , DATA_DEGREE >* colorData = NULL;
-		if( sampleData )
+		if( Normals.set )
 		{
-			colorData = new SparseNodeData< ProjectiveData< Point3D< Real > , Real > , DATA_DEGREE >();
-			*colorData = tree.template setDataField< DATA_DEGREE , false >( *samples , *sampleData , (DensityEstimator*)NULL );
-			delete sampleData , sampleData = NULL;
-			for( const OctNode< TreeNodeData >* n = tree.tree().nextNode() ; n ; n=tree.tree().nextNode( n ) )
+			if( Density.set )
 			{
-				ProjectiveData< Point3D< Real > , Real >* clr = (*colorData)( n );
-				if( clr ) (*clr) *= (Real)pow( Color.value , tree.depth( n ) );
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamNormal< Real , Dim > , PointStreamValue< Real > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 0 >( d.data ) , std::get< 1 >( v.data.data ).data = w , std::get< 2 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
+			else
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamNormal< Real , Dim > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 0 >( d.data ) , std::get< 1 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
 			}
-		}
-		tree.template getMCIsoSurface< Degree , BType , WEIGHT_DEGREE , DATA_DEGREE >( density , colorData , solution , isoValue , mesh , !LinearFit.set , !NonManifold.set , PolygonMesh.set );
-		DumpOutput( "Vertices / Polygons: %d / %d\n" , mesh.outOfCorePointCount()+mesh.inCorePoints.size() , mesh.polygonCount() );
-		if( PolygonMesh.set ) profiler.dumpOutput2( comments , "#         Got polygons:" );
-		else                  profiler.dumpOutput2( comments , "#        Got triangles:" );
-
-		if( colorData ) delete colorData , colorData = NULL;
-
-		if( NoComments.set )
-		{
-			if( ASCII.set ) PlyWritePolygons( Out.value , &mesh , PLY_ASCII         , NULL , 0 , iXForm );
-			else            PlyWritePolygons( Out.value , &mesh , PLY_BINARY_NATIVE , NULL , 0 , iXForm );
 		}
 		else
 		{
-			if( ASCII.set ) PlyWritePolygons( Out.value , &mesh , PLY_ASCII         , &comments[0] , (int)comments.size() , iXForm );
-			else            PlyWritePolygons( Out.value , &mesh , PLY_BINARY_NATIVE , &comments[0] , (int)comments.size() , iXForm );
+			if( Density.set )
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamValue< Real > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ).data = w , std::get< 1 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
+			else
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
 		}
+		if( sampleData ){ delete sampleData ; sampleData = NULL; }
 	}
 	if( density ) delete density , density = NULL;
-	DumpOutput2( comments , "#          Total Solve: %9.1f (s), %9.1f (MB)\n" , Time()-startTime , tree.maxMemoryUsage() );
-
-	return 1;
+	messageWriter( comments , "#          Total Solve: %9.1f (s), %9.1f (MB)\n" , Time()-startTime , FEMTree< Dim , Real >::MaxMemoryUsage() );
 }
 
-#if defined( _WIN32 ) || defined( _WIN64 )
-inline double to_seconds( const FILETIME& ft )
-{
-	const double low_to_sec=100e-9; // 100 nanoseconds
-	const double high_to_sec=low_to_sec*4294967296.0;
-	return ft.dwLowDateTime*low_to_sec+ft.dwHighDateTime*high_to_sec;
-}
-#endif // _WIN32 || _WIN64
-
 #ifndef FAST_COMPILE
-template< class Real , class Vertex >
-int Execute( int argc , char* argv[] )
+template< unsigned int Dim , class Real , typename ... SampleData >
+void Execute( int argc , char* argv[] )
 {
 	switch( BType.value )
 	{
@@ -703,104 +730,87 @@ int Execute( int argc , char* argv[] )
 		{
 			switch( Degree.value )
 			{
-			case 1: return _Execute< Real , 1 , BOUNDARY_FREE , Vertex >( argc , argv );
-			case 2: return _Execute< Real , 2 , BOUNDARY_FREE , Vertex >( argc , argv );
-			case 3: return _Execute< Real , 3 , BOUNDARY_FREE , Vertex >( argc , argv );
-			case 4: return _Execute< Real , 4 , BOUNDARY_FREE , Vertex >( argc , argv );
-			default: fprintf( stderr , "[ERROR] Only B-Splines of degree 1 - 4 are supported" ) ; return EXIT_FAILURE;
+				case 1: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 1 , BOUNDARY_FREE >::Signature >() );
+				case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_FREE >::Signature >() );
+//				case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_FREE >::Signature >() );
+//				case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_FREE >::Signature >() );
+				default: ERROR_OUT( "Only B-Splines of degree 1 - 2 are supported" );
 			}
 		}
 		case BOUNDARY_NEUMANN+1:
 		{
 			switch( Degree.value )
 			{
-			case 1: return _Execute< Real , 1 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-			case 2: return _Execute< Real , 2 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-			case 3: return _Execute< Real , 3 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-			case 4: return _Execute< Real , 4 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-			default: fprintf( stderr , "[ERROR] Only B-Splines of degree 1 - 4 are supported" ) ; return EXIT_FAILURE;
+				case 1: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 1 , BOUNDARY_NEUMANN >::Signature >() );
+				case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_NEUMANN >::Signature >() );
+//				case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_NEUMANN >::Signature >() );
+//				case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_NEUMANN >::Signature >() );
+				default: ERROR_OUT( "Only B-Splines of degree 1 - 2 are supported" );
 			}
 		}
 		case BOUNDARY_DIRICHLET+1:
 		{
 			switch( Degree.value )
 			{
-			case 1: return _Execute< Real , 1 , BOUNDARY_DIRICHLET , Vertex >( argc , argv );
-			case 2: return _Execute< Real , 2 , BOUNDARY_DIRICHLET , Vertex >( argc , argv );
-			case 3: return _Execute< Real , 3 , BOUNDARY_DIRICHLET , Vertex >( argc , argv );
-			case 4: return _Execute< Real , 4 , BOUNDARY_DIRICHLET , Vertex >( argc , argv );
-			default: fprintf( stderr , "[ERROR] Only B-Splines of degree 1 - 4 are supported" ) ; return EXIT_FAILURE;
+			case 1: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 1 , BOUNDARY_DIRICHLET >::Signature >() );
+			case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_DIRICHLET >::Signature >() );
+//			case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_DIRICHLET >::Signature >() );
+//			case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_DIRICHLET >::Signature >() );
+			default: ERROR_OUT( "Only B-Splines of degree 1 - 2 are supported" );
 			}
 		}
-		default: fprintf( stderr , "[ERROR] Not a valid boundary type: %d\n" , BType.value ) ; return EXIT_FAILURE;
+		default: ERROR_OUT( "Not a valid boundary type: %d" , BType.value );
 	}
 }
 #endif // !FAST_COMPILE
+
 int main( int argc , char* argv[] )
 {
+	Timer timer;
 #ifdef ARRAY_DEBUG
-	fprintf( stderr , "[WARNING] Running in array debugging mode\n" );
+	WARN( "Array debugging enabled" );
 #endif // ARRAY_DEBUG
-#if defined( WIN32 ) && defined( MAX_MEMORY_GB )
-	if( MAX_MEMORY_GB>0 )
+
+	cmdLineParse( argc-1 , &argv[1] , params );
+	if( MaxMemoryGB.value>0 ) SetPeakMemoryMB( MaxMemoryGB.value<<10 );
+	omp_set_num_threads( Threads.value > 1 ? Threads.value : 1 );
+	messageWriter.echoSTDOUT = Verbose.set;
+
+	if( !In.set )
 	{
-		SIZE_T peakMemory = 1;
-		peakMemory <<= 30;
-		peakMemory *= MAX_MEMORY_GB;
-		printf( "Limiting memory usage to %.2f GB\n" , float( peakMemory>>30 ) );
-		HANDLE h = CreateJobObject( NULL , NULL );
-		AssignProcessToJobObject( h , GetCurrentProcess() );
-
-		JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
-		jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_JOB_MEMORY;
-		jeli.JobMemoryLimit = peakMemory;
-		if( !SetInformationJobObject( h , JobObjectExtendedLimitInformation , &jeli , sizeof( jeli ) ) )
-			fprintf( stderr , "Failed to set memory limit\n" );
+		ShowUsage( argv[0] );
+		return 0;
 	}
-#endif // defined( WIN32 ) && defined( MAX_MEMORY_GB )
-	double t = Time();
+	if( DataX.value<=0 ) Normals.set = Colors.set = false;
+	if( BaseDepth.value>FullDepth.value )
+	{
+		if( BaseDepth.set ) WARN( "Base depth must be smaller than full depth: %d <= %d" , BaseDepth.value , FullDepth.value );
+		BaseDepth.value = FullDepth.value;
+	}
+
+#ifdef USE_DOUBLE
+	typedef double Real;
+#else // !USE_DOUBLE
+	typedef float  Real;
+#endif // USE_DOUBLE
 
-	cmdLineParse( argc-1 , &argv[1] , sizeof(params)/sizeof(cmdLineReadable*) , params , 1 );
 #ifdef FAST_COMPILE
-	static const int Degree = 2;
-	static const BoundaryType BType = BOUNDARY_NEUMANN;
-	fprintf( stderr , "[WARNING] Compiling for degree-%d, boundary-%s, single-precision _only_\n" , Degree , BoundaryNames[ BType ] );
-	if( Density.set )
-		if( Color.set && Color.value>0 ) return _Execute< float , Degree , BType , PlyColorAndValueVertex< float > >( argc , argv );
-		else                             return _Execute< float , Degree , BType , PlyValueVertex< float > >( argc , argv );
-	else
-		if( Color.set && Color.value>0 ) return _Execute< float , Degree , BType , PlyColorVertex< float > >( argc , argv );
-		else                             return _Execute< float , Degree , BType , PlyVertex< float > >( argc , argv );
+	static const int Degree = DEFAULT_FEM_DEGREE;
+	static const BoundaryType BType = DEFAULT_FEM_BOUNDARY;
+	typedef IsotropicUIntPack< DIMENSION , FEMDegreeAndBType< Degree , BType >::Signature > FEMSigs;
+	WARN( "Compiled for degree-%d, boundary-%s, %s-precision _only_" , Degree , BoundaryNames[ BType ] , sizeof(DefaultFloatType)==4 ? "single" : "double" );
+	if( !PointWeight.set ) PointWeight.value = DefaultPointWeightMultiplier*Degree;
+	if( Colors.set ) Execute< Real , PointStreamColor< DefaultFloatType > >( argc , argv , FEMSigs() );
+	else             Execute< Real >( argc , argv , FEMSigs() );
 #else // !FAST_COMPILE
-	{
-		if( Density.set )
-			if( Color.set && Color.value>0 )
-				if( Double.set ) Execute< double , PlyColorAndValueVertex< float > >( argc , argv );
-				else             Execute< float  , PlyColorAndValueVertex< float > >( argc , argv );
-			else
-				if( Double.set ) Execute< double , PlyValueVertex< float > >( argc , argv );
-				else             Execute< float  , PlyValueVertex< float > >( argc , argv );
-		else
-			if( Color.set && Color.value>0 )
-				if( Double.set ) Execute< double , PlyColorVertex< float > >( argc , argv );
-				else             Execute< float  , PlyColorVertex< float > >( argc , argv );
-			else
-				if( Double.set ) Execute< double , PlyVertex< float > >( argc , argv );
-				else             Execute< float  , PlyVertex< float > >( argc , argv );
-	}
+	if( !PointWeight.set ) PointWeight.value = DefaultPointWeightMultiplier*Degree.value;
+	if( Colors.set ) Execute< DIMENSION , Real , PointStreamColor< float > >( argc , argv );
+	else             Execute< DIMENSION , Real >( argc , argv );
 #endif // FAST_COMPILE
-#if defined( _WIN32 ) || defined( _WIN64 )
 	if( Performance.set )
 	{
-		HANDLE cur_thread=GetCurrentThread();
-		FILETIME tcreat, texit, tkernel, tuser;
-		if( GetThreadTimes( cur_thread , &tcreat , &texit , &tkernel , &tuser ) )
-			printf( "Time (Wall/User/Kernel): %.2f / %.2f / %.2f\n" , Time()-t , to_seconds( tuser ) , to_seconds( tkernel ) );
-		else printf( "Time: %.2f\n" , Time()-t );
-		HANDLE h = GetCurrentProcess();
-		PROCESS_MEMORY_COUNTERS pmc;
-		if( GetProcessMemoryInfo( h , &pmc , sizeof(pmc) ) ) printf( "Peak Memory (MB): %d\n" , (int)( pmc.PeakWorkingSetSize>>20 ) );
+		printf( "Time (Wall/CPU): %.2f / %.2f\n" , timer.wallTime() , timer.cpuTime() );
+		printf( "Peak Memory (MB): %d\n" , MemoryInfo::PeakMemoryUsageMB() );
 	}
-#endif // _WIN32 || _WIN64
 	return EXIT_SUCCESS;
 }
diff --git a/Src/Polynomial.h b/Src/Polynomial.h
index 397b7bd..58fc354 100644
--- a/Src/Polynomial.h
+++ b/Src/Polynomial.h
@@ -29,19 +29,14 @@ DAMAGE.
 #ifndef POLYNOMIAL_INCLUDED
 #define POLYNOMIAL_INCLUDED
 
-#define NEW_POLYNOMIAL_CODE 1
-
-#include <vector>
-
 template< int Degree >
 class Polynomial
 {
 public:
 	double coefficients[Degree+1];
 
-	Polynomial(void);
-	template<int Degree2>
-	Polynomial(const Polynomial<Degree2>& P);
+	Polynomial( void );
+	template< int Degree2 > Polynomial( const Polynomial< Degree2 >& P );
 	double operator()( double t ) const;
 	double integral( double tMin , double tMax ) const;
 
@@ -72,13 +67,20 @@ class Polynomial
 	Polynomial scale( double s ) const;
 	Polynomial shift( double t ) const;
 
-	Polynomial<Degree-1> derivative(void) const;
-	Polynomial<Degree+1> integral(void) const;
+	template< int _Degree=Degree >
+	typename std::enable_if< (_Degree==0) , Polynomial< Degree   > >::type derivative( void ) const { return Polynomial< Degree >(); }
+	template< int _Degree=Degree >
+	typename std::enable_if< (_Degree> 0) , Polynomial< Degree-1 > >::type derivative( void ) const
+	{
+		Polynomial< Degree-1 > p;
+		for( int i=0 ; i<Degree ; i++ ) p.coefficients[i] = coefficients[i+1]*(i+1);
+		return p;
+	}
+	Polynomial< Degree+1 > integral(void) const;
 
-	void printnl(void) const;
+	void printnl( void ) const;
 
 	Polynomial& addScaled(const Polynomial& p,double scale);
-
 	static void Negate(const Polynomial& in,Polynomial& out);
 	static void Subtract(const Polynomial& p1,const Polynomial& p2,Polynomial& q);
 	static void Scale(const Polynomial& p,double w,Polynomial& q);
@@ -86,7 +88,6 @@ class Polynomial
 	static void AddScaled(const Polynomial& p1,const Polynomial& p2,double w2,Polynomial& q);
 	static void AddScaled(const Polynomial& p1,double w1,const Polynomial& p2,Polynomial& q);
 
-	void getSolutions(double c,std::vector<double>& roots,double EPS) const;
 	int getSolutions( double c , double* roots , double EPS ) const;
 
 	// [NOTE] Both of these methods define the indexing according to DeBoor's algorithm, so that
diff --git a/Src/Polynomial.inl b/Src/Polynomial.inl
index ea7ae50..eb8c034 100644
--- a/Src/Polynomial.inl
+++ b/Src/Polynomial.inl
@@ -53,13 +53,6 @@ Polynomial<Degree>& Polynomial<Degree>::operator  = (const Polynomial<Degree2> &
 	return *this;
 }
 
-template<int Degree>
-Polynomial<Degree-1> Polynomial<Degree>::derivative(void) const{
-	Polynomial<Degree-1> p;
-	for(int i=0;i<Degree;i++){p.coefficients[i]=coefficients[i+1]*(i+1);}
-	return p;
-}
-
 template<int Degree>
 Polynomial<Degree+1> Polynomial<Degree>::integral(void) const{
 	Polynomial<Degree+1> p;
@@ -67,10 +60,10 @@ Polynomial<Degree+1> Polynomial<Degree>::integral(void) const{
 	for(int i=0;i<=Degree;i++){p.coefficients[i+1]=coefficients[i]/(i+1);}
 	return p;
 }
-template<> double Polynomial< 0 >::operator() ( double t ) const { return coefficients[0]; }
-template<> double Polynomial< 1 >::operator() ( double t ) const { return coefficients[0]+coefficients[1]*t; }
-template<> double Polynomial< 2 >::operator() ( double t ) const { return coefficients[0]+(coefficients[1]+coefficients[2]*t)*t; }
-template<int Degree>
+template< > double Polynomial< 0 >::operator() ( double t ) const { return coefficients[0]; }
+template< > double Polynomial< 1 >::operator() ( double t ) const { return coefficients[0]+coefficients[1]*t; }
+template< > double Polynomial< 2 >::operator() ( double t ) const { return coefficients[0]+(coefficients[1]+coefficients[2]*t)*t; }
+template< int Degree >
 double Polynomial<Degree>::operator() ( double t ) const{
 	double v=coefficients[Degree];
 	for( int d=Degree-1 ; d>=0 ; d-- ) v = v*t + coefficients[d];
@@ -261,38 +254,10 @@ void Polynomial<Degree>::printnl(void) const{
 	}
 	printf("\n");
 }
-template<int Degree>
-void Polynomial<Degree>::getSolutions(double c,std::vector<double>& roots,double EPS) const
-{
-	double r[4][2];
-	int rCount=0;
-	roots.clear();
-	switch(Degree){
-	case 1:
-		rCount=Factor(coefficients[1],coefficients[0]-c,r,EPS);
-		break;
-	case 2:
-		rCount=Factor(coefficients[2],coefficients[1],coefficients[0]-c,r,EPS);
-		break;
-	case 3:
-		rCount=Factor(coefficients[3],coefficients[2],coefficients[1],coefficients[0]-c,r,EPS);
-		break;
-//	case 4:
-//		rCount=Factor(coefficients[4],coefficients[3],coefficients[2],coefficients[1],coefficients[0]-c,r,EPS);
-//		break;
-	default:
-		printf("Can't solve polynomial of degree: %d\n",Degree);
-	}
-	for(int i=0;i<rCount;i++){
-		if(fabs(r[i][1])<=EPS){
-			roots.push_back(r[i][0]);
-		}
-	}
-}
 template< int Degree >
 int Polynomial<Degree>::getSolutions( double c , double* roots , double EPS ) const
 {
-	double _roots[4][2];
+	std::complex< double > _roots[4];
 	int _rCount=0;
 	switch( Degree )
 	{
@@ -303,7 +268,7 @@ int Polynomial<Degree>::getSolutions( double c , double* roots , double EPS ) co
 		default: printf( "Can't solve polynomial of degree: %d\n" , Degree );
 	}
 	int rCount = 0;
-	for( int i=0 ; i<_rCount ; i++ ) if( fabs(_roots[i][1])<=EPS ) roots[rCount++] = _roots[i][0];
+	for( int i=0 ; i<_rCount ; i++ ) if( fabs( _roots[i].imag() )<=EPS ) roots[rCount++] = _roots[i].real();
 	return rCount;
 }
 // The 0-th order B-spline
diff --git a/Src/RegularTree.h b/Src/RegularTree.h
new file mode 100644
index 0000000..5e1a1c2
--- /dev/null
+++ b/Src/RegularTree.h
@@ -0,0 +1,244 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#ifndef REGULAR_TREE_NODE_INCLUDED
+#define REGULAR_TREE_NODE_INCLUDED
+
+#include "Allocator.h"
+#include "BinaryNode.h"
+#include "Window.h"
+#include <functional>
+
+#ifdef USE_DEEP_TREE_NODES
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType=unsigned int >
+#else // !USE_DEEP_TREE_NODES
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType=unsigned short >
+#endif // USE_DEEP_TREE_NODES
+struct RegularTreeNode
+{
+private:
+	DepthAndOffsetType _depth , _offset[Dim];
+public:
+
+	RegularTreeNode* parent;
+	RegularTreeNode* children;
+	NodeData nodeData;
+
+	RegularTreeNode( std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+	static RegularTreeNode* NewBrood( Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+	static void ResetDepthAndOffset( RegularTreeNode* root , int d , int off[Dim] );
+	int initChildren( Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+
+	void cleanChildren( Allocator< RegularTreeNode >* nodeAllocator );
+	~RegularTreeNode( void );
+
+	// The merge functor takes two objects of type NodeData and returns an object of type NodeData
+	// [NOTE] We are assuming that the merge functor is symmetric, f(a,b) = f(b,a), and implicity satisfies f(a) = a
+	template< class MergeFunctor >
+	void merge( RegularTreeNode* node , MergeFunctor& f );
+
+	void depthAndOffset( int& depth , int offset[Dim] ) const; 
+	void centerIndex( int index[Dim] ) const;
+	int depth( void ) const;
+	template< class Real > void centerAndWidth( Point< Real , Dim >& center , Real& width ) const;
+	template< class Real > void startAndWidth( Point< Real , Dim >& start , Real& width ) const;
+	template< class Real > bool isInside( Point< Real , Dim > p ) const;
+
+	size_t leaves( void ) const;
+	size_t maxDepthLeaves( int maxDepth ) const;
+	size_t nodes( void ) const;
+	int maxDepth( void ) const;
+
+	const RegularTreeNode* root( void ) const;
+
+	const RegularTreeNode* nextLeaf( const RegularTreeNode* currentLeaf=NULL ) const;
+	RegularTreeNode* nextLeaf( RegularTreeNode* currentLeaf=NULL );
+
+	// This lambda takes a RegularTreeNode* as an argument and returns true if we do not need to traverse the tree beyond the specified node.
+	template< typename NodeTerminationLambda >
+	const RegularTreeNode* nextNode( NodeTerminationLambda &ntl , const RegularTreeNode* currentNode ) const;
+	template< typename NodeTerminationLambda >
+	RegularTreeNode* nextNode( NodeTerminationLambda &ntl , RegularTreeNode* currentNode );
+
+	const RegularTreeNode* nextNode( const RegularTreeNode* currentNode=NULL ) const;
+	RegularTreeNode* nextNode( RegularTreeNode* currentNode=NULL );
+	const RegularTreeNode* nextBranch( const RegularTreeNode* current ) const;
+	RegularTreeNode* nextBranch( RegularTreeNode* current );
+	const RegularTreeNode* prevBranch( const RegularTreeNode* current ) const;
+	RegularTreeNode* prevBranch( RegularTreeNode* current );
+
+	void setFullDepth( int maxDepth , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+
+	void printLeaves( void ) const;
+	void printRange( void ) const;
+
+	template< class Real > static int ChildIndex( const Point< Real , Dim >& center , const Point< Real , Dim > &p );
+
+	bool write( const char* fileName ) const;
+	bool write( FILE* fp ) const;
+	bool read( const char* fileName , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+	bool read( FILE* fp , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+
+	template< typename Pack > struct Neighbors{};
+	template< unsigned int ... Widths >
+	struct Neighbors< UIntPack< Widths ... > >
+	{
+		typedef StaticWindow< RegularTreeNode* , UIntPack< Widths ... > > Window;
+		Window neighbors;
+		Neighbors( void );
+		void clear( void );
+	};
+	template< typename Pack > struct ConstNeighbors{};
+	template< unsigned int ... Widths >
+	struct ConstNeighbors< UIntPack< Widths ... > >
+	{
+		typedef StaticWindow< const RegularTreeNode* , UIntPack< Widths ... > > Window;
+		Window neighbors;
+		ConstNeighbors( void );
+		void clear( void );
+	};
+
+	template< typename LeftPack , typename RightPack > struct NeighborKey{};
+	template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+	struct NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >
+	{
+	protected:
+		static_assert( sizeof...(LeftRadii)==sizeof...(RightRadii) , "[ERROR] Left and right radii dimensions don't match" );
+		static const unsigned int CenterIndex = WindowIndex< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > , UIntPack< LeftRadii ... > >::Index;
+		int _depth;
+
+		template< bool CreateNodes , unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+		static unsigned int _NeighborsLoop( UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > , ConstWindowSlice< RegularTreeNode* , UIntPack< ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int cIdx , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer );
+		template< bool CreateNodes , unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+		static unsigned int _NeighborsLoop( UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > ,      WindowSlice< RegularTreeNode* , UIntPack< ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int cIdx , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer );
+
+		template< bool CreateNodes , typename PLeft , typename PRight , typename CLeft , typename CRight > struct _Run{};
+
+		template< bool CreateNodes , unsigned int _PLeftRadius , unsigned int ... _PLeftRadii , unsigned int _PRightRadius , unsigned int ... _PRightRadii , unsigned int _CLeftRadius , unsigned int ... _CLeftRadii , unsigned int _CRightRadius , unsigned int ... _CRightRadii >
+		struct _Run< CreateNodes , UIntPack< _PLeftRadius , _PLeftRadii ... > , UIntPack< _PRightRadius , _PRightRadii ... > , UIntPack< _CLeftRadius , _CLeftRadii ... > , UIntPack< _CRightRadius , _CRightRadii ... > >
+		{
+			static unsigned int Run( ConstWindowSlice< RegularTreeNode* , UIntPack< _PLeftRadius+_PRightRadius+1 , ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< _CLeftRadius+_CRightRadius+1 , ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int* c , int cornerIndex , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer );
+		};
+		template< bool CreateNodes , unsigned int _PLeftRadius , unsigned int _PRightRadius , unsigned int _CLeftRadius , unsigned int _CRightRadius >
+		struct _Run< CreateNodes , UIntPack< _PLeftRadius > , UIntPack< _PRightRadius > , UIntPack< _CLeftRadius > , UIntPack< _CRightRadius > >
+		{
+			static unsigned int Run( ConstWindowSlice< RegularTreeNode* , UIntPack< _PLeftRadius+_PRightRadius+1 > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< _CLeftRadius+_CRightRadius+1 > > cNeighbors , int* c , int cornerIndex , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer );
+		};
+	public:
+		typedef Neighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > > NeighborType;
+		NeighborType* neighbors;
+
+
+		NeighborKey( void );
+		NeighborKey( const NeighborKey& key );
+		~NeighborKey( void );
+		int depth( void ) const { return _depth; }
+
+		void set( int depth );
+
+		template< bool CreateNodes >
+		typename RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::template Neighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& getNeighbors( RegularTreeNode* node , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+
+		NeighborType& getNeighbors( const RegularTreeNode* node ) { return getNeighbors< false >( (RegularTreeNode*)node , NULL , std::function< void ( RegularTreeNode& ) >() ); }
+
+		template< bool CreateNodes , unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > ,       RegularTreeNode* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+		template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors ){ return getNeighbors< false >( UIntPack< _LeftRadii ... >() , UIntPack< _RightRadii ... >() , (RegularTreeNode*)node , NULL , std::function< void ( RegularTreeNode& ) >() ); }
+		template< bool CreateNodes , unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > ,       RegularTreeNode* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& pNeighbors , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() );
+		template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& pNeighbors , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors ){ return getNeighbors< false >( UIntPack< _LeftRadii ... >() , UIntPack< _RightRadii ... >() , (RegularTreeNode*)node , NULL , std::function< void ( RegularTreeNode& ) >() ); }
+
+		template< bool CreateNodes >
+		unsigned int getChildNeighbors( int cIdx , int d , NeighborType& childNeighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() ) const;
+		unsigned int getChildNeighbors( int cIdx , int d , NeighborType& childNeighbors ) const { return getChildNeighbors< false >( cIdx , d , childNeighbors , NULL , std::function< void ( RegularTreeNode& ) >() ); }
+
+		template< bool CreateNodes , class Real >
+		unsigned int getChildNeighbors( Point< Real , Dim > p , int d , NeighborType& childNeighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer=std::function< void ( RegularTreeNode& ) >() ) const;
+		template< class Real >
+		unsigned int getChildNeighbors( Point< Real , Dim > p , int d , NeighborType& childNeighbors ) const { return getChildNeighbors< false , Real >( p , d , childNeighbors , NULL , std::function< void ( RegularTreeNode& ) >() ); }
+	};
+
+	template< typename LeftPack , typename RightPack > struct ConstNeighborKey{};
+
+	template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+	struct ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >
+	{
+	protected:
+		static_assert( sizeof...(LeftRadii)==sizeof...(RightRadii) , "[ERROR] Left and right radii dimensions don't match" );
+		static const unsigned int CenterIndex = WindowIndex< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > , UIntPack< LeftRadii ... > >::Index;
+		int _depth;
+
+		template< unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+		static unsigned int _NeighborsLoop( UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > , ConstWindowSlice< const RegularTreeNode* , UIntPack< ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int cIdx );
+		template< unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+		static unsigned int _NeighborsLoop( UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > , WindowSlice< const RegularTreeNode* , UIntPack< ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int cIdx );
+
+		template< typename PLeft , typename PRight , typename CLeft , typename CRight > struct _Run{};
+
+		template< unsigned int _PLeftRadius , unsigned int ... _PLeftRadii , unsigned int _PRightRadius , unsigned int ... _PRightRadii , unsigned int _CLeftRadius , unsigned int ... _CLeftRadii , unsigned int _CRightRadius , unsigned int ... _CRightRadii >
+		struct _Run< UIntPack< _PLeftRadius , _PLeftRadii ... > , UIntPack< _PRightRadius , _PRightRadii ... > , UIntPack< _CLeftRadius , _CLeftRadii ... > , UIntPack< _CRightRadius , _CRightRadii ... > >
+		{
+			static unsigned int Run( ConstWindowSlice< const RegularTreeNode* , UIntPack< _PLeftRadius + _PRightRadius + 1 , ( _PLeftRadii+_PRightRadii+1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< _CLeftRadius + _CRightRadius + 1 , ( _CLeftRadii+_CRightRadii+1 ) ... > > cNeighbors , int* c , int cornerIndex );
+		};
+		template< unsigned int _PLeftRadius , unsigned int _PRightRadius , unsigned int _CLeftRadius , unsigned int _CRightRadius >
+		struct _Run< UIntPack< _PLeftRadius > , UIntPack< _PRightRadius > , UIntPack< _CLeftRadius > , UIntPack< _CRightRadius > >
+		{
+			static unsigned int Run( ConstWindowSlice< const RegularTreeNode* , UIntPack< _PLeftRadius+_PRightRadius+1 > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< _CLeftRadius+_CRightRadius+1 > > cNeighbors , int* c , int cornerIndex );
+		};
+
+	public:
+
+		typedef ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > > NeighborType;
+		NeighborType* neighbors;
+
+		ConstNeighborKey( void );
+		ConstNeighborKey( const ConstNeighborKey& key );
+		~ConstNeighborKey( void );
+		ConstNeighborKey& operator = ( const ConstNeighborKey& key );
+
+		int depth( void ) const { return _depth; }
+		void set( int depth );
+
+		typename RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::template ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& getNeighbors( const RegularTreeNode* node );
+		template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode* node , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors );
+		template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+		void getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode* node , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& pNeighbors , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors );
+		unsigned int getChildNeighbors( int cIdx , int d , NeighborType& childNeighbors ) const;
+		template< class Real >
+		unsigned int getChildNeighbors( Point< Real , Dim > p , int d , ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& childNeighbors ) const;
+	};
+
+	int width( int maxDepth ) const;
+};
+
+#include "RegularTree.inl"
+
+#endif // REGULAR_TREE_NODE_INCLUDED
diff --git a/Src/RegularTree.inl b/Src/RegularTree.inl
new file mode 100644
index 0000000..bafec9a
--- /dev/null
+++ b/Src/RegularTree.inl
@@ -0,0 +1,797 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#include <stdlib.h>
+#include <math.h>
+#include <algorithm>
+
+/////////////////////
+// RegularTreeNode //
+/////////////////////
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::RegularTreeNode( std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	parent = children = NULL;
+	_depth = 0;
+	memset( _offset , 0 , sizeof(_offset ) );
+	if( Initializer ) Initializer( *this );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::cleanChildren( Allocator< RegularTreeNode >* nodeAllocator )
+{
+	if( children )
+	{
+		for( int c=0 ; c<(1<<Dim) ; c++ ) children[c].cleanChildren( nodeAllocator );
+		if( !nodeAllocator ) delete[] children;
+	}
+	parent = children = NULL;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::~RegularTreeNode(void)
+{
+#ifdef SHOW_WARNINGS
+#pragma message( "[WARNING] Deallocation of children is your responsibility" )
+#endif // SHOW_WARNINGS
+	parent = children = NULL;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NewBrood( Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* brood;
+	if( nodeAllocator ) brood = nodeAllocator->newElements( 1<<Dim );
+	else                brood = new RegularTreeNode[ 1<<Dim ];
+	for( int idx=0 ; idx<(1<<Dim) ; idx++ )
+	{
+		if( Initializer ) Initializer( brood[idx] );
+		brood[idx]._depth = 0;
+		for( int d=0 ; d<Dim ; d++ ) brood[idx]._offset[d] = (idx>>d) & 1;
+	}
+	return brood;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ResetDepthAndOffset( RegularTreeNode* root , int d , int off[Dim] )
+{
+	std::function< void ( int& , int[Dim] ) > ParentDepthAndOffset = [] ( int& d , int off[Dim] ){ d-- ; for( int _d=0 ; _d<Dim ; _d++ ) off[_d]>>=1 ; };
+	std::function< void ( int& , int[Dim] ) >  ChildDepthAndOffset = [] ( int& d , int off[Dim] ){ d++ ; for( int _d=0 ; _d<Dim ; _d++ ) off[_d]<<=1 ; };
+	std::function< RegularTreeNode* ( RegularTreeNode* , int& , int[] ) > _nextBranch = [&]( RegularTreeNode* current , int& d , int off[Dim] )
+	{
+		if( current==root ) return (RegularTreeNode*)NULL;
+		else
+		{
+			int c = (int)( current - current->parent->children );
+
+			if( c==(1<<Dim)-1 )
+			{
+				ParentDepthAndOffset( d , off );
+				return _nextBranch( current->parent , d , off );
+			}
+			else
+			{
+				ParentDepthAndOffset( d , off ) ; ChildDepthAndOffset( d , off );
+				for( int _d=0 ; _d<Dim ; _d++ ) off[_d] |= ( ( (c+1)>>_d ) & 1 );
+				return current+1;
+			}
+		}
+	};
+	auto _nextNode = [&]( RegularTreeNode* current , int& d , int off[Dim] )
+	{
+		if( !current ) return root;
+		else if( current->children )
+		{
+			ChildDepthAndOffset( d , off );
+			return current->children;
+		}
+		else return _nextBranch( current , d , off );
+	};
+	for( RegularTreeNode* node=_nextNode( NULL , d , off ) ; node ; node = _nextNode( node , d , off ) )
+	{
+		node->_depth = (DepthAndOffsetType)d;
+		for( int _d=0 ; _d<Dim ; _d++ ) node->_offset[_d] = (DepthAndOffsetType)off[_d];
+	}
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::setFullDepth( int maxDepth , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	if( maxDepth>0 )
+	{
+		if( !children ) initChildren( nodeAllocator , Initializer );
+		for( int i=0 ; i<(1<<Dim) ; i++ ) children[i].setFullDepth( maxDepth-1 , nodeAllocator , Initializer );
+	}
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::initChildren( Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	if( nodeAllocator ) children = nodeAllocator->newElements( 1<<Dim );
+	else
+	{
+		if( children ) delete[] children;
+		children = new RegularTreeNode[ 1<<Dim ];
+	}
+	if( !children ) ERROR_OUT( "Failed to initialize children in RegularTreeNode::initChildren" );
+	for( int idx=0 ; idx<(1<<Dim) ; idx++ )
+	{
+		children[idx].parent = this;
+		children[idx].children = NULL;
+		if( Initializer ) Initializer( children[idx] );
+		children[idx]._depth = _depth+1;
+		for( int d=0 ; d<Dim ; d++ ) children[idx]._offset[d] = (_offset[d]<<1) | ( (idx>>d) & 1 );
+	}
+	return 1;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< class MergeFunctor >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::merge( RegularTreeNode* node , MergeFunctor& f )
+{
+	if( node )
+	{
+		nodeData = f( nodeData , node->nodeData );
+		if( children && node->children ) for( int c=0 ; c<(1<<Dim) ; c++ ) children[c].merge( node->children[c] , f );
+		else if( node->children )
+		{
+			children = node->children;
+			for( int c=0 ; c<(1<<Dim) ; c++ ) children[c].parent = this;
+			node->children = NULL;
+		}
+	}
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+inline void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::depthAndOffset( int& depth , int offset[Dim] ) const
+{
+	depth = _depth;
+	for( int d=0 ; d<Dim ; d++ ) offset[d] = _offset[d];
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+inline void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::centerIndex( int index[Dim] ) const
+{
+	for( int i=0 ; i<Dim ; i++ ) index[i] = BinaryNode::CenterIndex( _depth , _offset[i] );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+inline int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::depth( void ) const { return _depth; }
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< class Real >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::centerAndWidth( Point< Real , Dim >& center , Real& width ) const
+{
+	width = Real( 1.0 / (1<<_depth) );
+	for( int d=0 ; d<Dim ; d++ ) center[d] = Real( 0.5+_offset[d] ) * width;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< class Real >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::startAndWidth( Point< Real , Dim >& start , Real& width ) const
+{
+	width = Real( 1.0 / (1<<_depth) );
+	for( int d=0 ; d<Dim ; d++ ) start[d] = Real( _offset[d] ) * width;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< class Real >
+bool RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::isInside( Point< Real , Dim > p ) const
+{
+	Point< Real , Dim > c ; Real w;
+	centerAndWidth( c , w ) , w /= 2;
+	for( int d=0 ; d<Dim ; d++ ) if( p[d]<=(c[d]-w) || p[d]>(c[d]+w) ) return false;
+	return true;
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::maxDepth(void) const
+{
+	if( !children ) return 0;
+	else
+	{
+		int c , d;
+		for( int i=0 ; i<(1<<Dim) ; i++ )
+		{
+			d = children[i].maxDepth();
+			if( !i || d>c ) c=d;
+		}
+		return c+1;
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+size_t RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nodes( void ) const
+{
+	if( !children ) return 1;
+	else
+	{
+		size_t c=0;
+		for( int i=0 ; i<(1<<Dim) ; i++ ) c += children[i].nodes();
+		return c+1;
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+size_t RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::leaves( void ) const
+{
+	if( !children ) return 1;
+	else
+	{
+		size_t c=0;
+		for( int i=0 ; i<(1<<Dim) ; i++ ) c += children[i].leaves();
+		return c;
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+size_t RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::maxDepthLeaves( int maxDepth ) const
+{
+	if( depth()>maxDepth ) return 0;
+	if( !children ) return 1;
+	else
+	{
+		size_t c=0;
+		for( int i=0 ; i<(1<<Dim) ; i++ ) c += children[i].maxDepthLeaves(maxDepth);
+		return c;
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::root( void ) const
+{
+	const RegularTreeNode* temp = this;
+	while( temp->parent ) temp = temp->parent;
+	return temp;
+}
+
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextBranch( const RegularTreeNode* current ) const
+{
+	if( !current->parent || current==this ) return NULL;
+	if( current-current->parent->children==(1<<Dim)-1 ) return nextBranch( current->parent );
+	else return current+1;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextBranch(RegularTreeNode* current){
+	if( !current->parent || current==this ) return NULL;
+	if( current-current->parent->children==(1<<Dim)-1 ) return nextBranch(current->parent);
+	else return current+1;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::prevBranch( const RegularTreeNode* current ) const
+{
+	if( !current->parent || current==this ) return NULL;
+	if( current-current->parent->children==0 ) return prevBranch( current->parent );
+	else return current-1;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::prevBranch( RegularTreeNode* current )
+{
+	if( !current->parent || current==this ) return NULL;
+	if( current-current->parent->children==0 ) return prevBranch( current->parent );
+	else return current-1;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextLeaf(const RegularTreeNode* current) const{
+	if(!current)
+	{
+		const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* temp=this;
+		while( temp->children ) temp = temp->children;
+		return temp;
+	}
+	if( current->children ) return current->nextLeaf();
+	const RegularTreeNode* temp=nextBranch( current );
+	if( !temp ) return NULL;
+	else return temp->nextLeaf();
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextLeaf(RegularTreeNode* current){
+	if( !current )
+	{
+		RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* temp=this;
+		while( temp->children ) temp = temp->children;
+		return temp;
+	}
+	if( current->children ) return current->nextLeaf();
+	RegularTreeNode* temp=nextBranch( current) ;
+	if( !temp ) return NULL;
+	else return temp->nextLeaf();
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< typename NodeTerminationLambda >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextNode( NodeTerminationLambda &ntl , const RegularTreeNode *current ) const
+{
+	if( !current ) return this;
+	else if( current->children && !ntl(current) ) return current->children;
+	else return nextBranch( current );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< typename NodeTerminationLambda >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextNode( NodeTerminationLambda &ntl , RegularTreeNode* current )
+{
+	if( !current ) return this;
+	else if( current->children && !ntl(current) ) return current->children;
+	else return nextBranch( current );
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextNode( const RegularTreeNode* current ) const
+{
+	if( !current ) return this;
+	else if( current->children ) return current->children;
+	else return nextBranch( current );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::nextNode( RegularTreeNode* current )
+{
+	if( !current ) return this;
+	else if( current->children ) return current->children;
+	else return nextBranch( current );
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::printRange(void) const
+{
+	Point< float , Dim > center;
+	float width;
+	centerAndWidth( center , width );
+	for( int d=0 ; d<Dim ; d++ )
+	{
+		printf( "[%f,%f]" , center[d]-width/2 , center[d]+width/2 );
+		if( d<Dim-1 ) printf( " x " );
+		else printf("\n");
+	}
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< class Real >
+int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ChildIndex( const Point< Real , Dim >& center , const Point< Real , Dim >& p )
+{
+	int cIndex=0;
+	for( int d=0 ; d<Dim ; d++ ) if( p[d]>center[d] ) cIndex |= (1<<d);
+	return cIndex;
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+bool RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::write( const char* fileName ) const
+{
+	FILE* fp=fopen( fileName , "wb" );
+	if( !fp ) return false;
+	bool ret = write(fp);
+	fclose(fp);
+	return ret;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+bool RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::write( FILE* fp ) const
+{
+	fwrite( this , sizeof( RegularTreeNode< Dim , NodeData , DepthAndOffsetType > ) , 1 , fp );
+	if( children ) for( int i=0 ; i<(1<<Dim) ; i++ ) children[i].write(fp);
+	return true;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+bool RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::read( const char* fileName , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	FILE* fp = fopen( fileName , "rb" );
+	if( !fp ) return false;
+	bool ret = read( fp , nodeAllocator , Initializer );
+	fclose( fp );
+	return ret;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+bool RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::read( FILE* fp , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	if( fread( this , sizeof( RegularTreeNode< Dim , NodeData , DepthAndOffsetType > ) , 1 , fp )!=1 ) ERROR_OUT( "Failed to read node" );
+	parent = NULL;
+	if( children )
+	{
+		children = NULL;
+		initChildren( nodeAllocator , Initializer );
+		for( int i=0 ; i<(1<<Dim) ; i++ ) children[i].read( fp , nodeAllocator , Initializer ) , children[i].parent = this;
+	}
+	return true;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::width( int maxDepth ) const
+{
+	int d=depth();
+	return 1<<(maxDepth-d); 
+}
+
+////////////////////////////////
+// RegularTreeNode::Neighbors //
+////////////////////////////////
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... Widths >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::Neighbors< UIntPack< Widths ... > >::Neighbors( void ){ static_assert( sizeof...(Widths)==Dim , "[ERROR] Window and tree dimensions don't match" ) ; clear(); }
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... Widths >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::Neighbors< UIntPack< Widths ... > >::clear( void ){ for( int i=0 ; i<WindowSize< UIntPack< Widths ... > >::Size ; i++ ) neighbors.data[i] = NULL; }
+
+/////////////////////////////////////
+// RegularTreeNode::ConstNeighbors //
+/////////////////////////////////////
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... Widths >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighbors< UIntPack< Widths ... > >::ConstNeighbors( void ){ static_assert( sizeof...(Widths)==Dim , "[ERROR] Window and tree dimensions don't match" ) ; clear(); }
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... Widths >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighbors< UIntPack< Widths ... > >::clear( void ){ for( int i=0 ; i<WindowSize< UIntPack< Widths ... > >::Size ; i++ ) neighbors.data[i] = NULL; }
+
+//////////////////////////////////
+// RegularTreeNode::NeighborKey //
+//////////////////////////////////
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::NeighborKey( void ){ _depth=-1 , neighbors=NULL; }
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::NeighborKey( const NeighborKey& key )
+{
+	_depth = 0 , neighbors = NULL;
+	set( key._depth );
+	for( int d=0 ; d<=_depth ; d++ ) memcpy( &neighbors[d] , &key.neighbors[d] , sizeof( Neighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > > ) );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::~NeighborKey( void )
+{
+	if( neighbors ) delete[] neighbors;
+	neighbors=NULL;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::set( int d )
+{
+	if( neighbors ) delete[] neighbors;
+	neighbors = NULL;
+	_depth = d;
+	if( d<0 ) return;
+	neighbors = new NeighborType[d+1];
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_NeighborsLoop( UIntPack< _PLeftRadii ... > pLeftRadii , UIntPack< _PRightRadii ... > pRightRadii , UIntPack< _CLeftRadii ... > cLeftRadii , UIntPack< _CRightRadii ... > cRightRadii , ConstWindowSlice< RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int cIdx , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	static_assert( Dim==sizeof ... ( _PLeftRadii ) && Dim==sizeof ... ( _PRightRadii ) && Dim==sizeof ... ( _CLeftRadii ) && Dim==sizeof ... ( _CRightRadii ) , "[ERROR] Dimensions don't match" );
+	int c[Dim];
+	for( int d=0 ; d<Dim ; d++ ) c[d] = ( cIdx>>d ) & 1;
+	return _Run< CreateNodes , UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > >::Run( pNeighbors , cNeighbors , c , 0 , nodeAllocator , Initializer );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_NeighborsLoop( UIntPack< _PLeftRadii ... > pLeftRadii , UIntPack< _PRightRadii ... > pRightRadii , UIntPack< _CLeftRadii ... > cLeftRadii , UIntPack< _CRightRadii ... > cRightRadii , WindowSlice< RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int cIdx , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	return _NeighborsLoop< CreateNodes >( UIntPack< _PLeftRadii ... >() , UIntPack< _PRightRadii ... >() , UIntPack< _CLeftRadii ... >() , UIntPack< _CRightRadii ... >() , ( ConstWindowSlice< RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > )pNeighbors , cNeighbors , cIdx , nodeAllocator , Initializer );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int _PLeftRadius , unsigned int ... _PLeftRadii , unsigned int _PRightRadius , unsigned int ... _PRightRadii , unsigned int _CLeftRadius , unsigned int ... _CLeftRadii , unsigned int _CRightRadius , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_Run< CreateNodes , UIntPack< _PLeftRadius , _PLeftRadii ... > , UIntPack< _PRightRadius , _PRightRadii ... > , UIntPack< _CLeftRadius , _CLeftRadii ... > , UIntPack< _CRightRadius , _CRightRadii ... > >::Run( ConstWindowSlice< RegularTreeNode* , UIntPack< _PLeftRadius + _PRightRadius + 1 , ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< _CLeftRadius + _CRightRadius + 1 , ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int* c , int cornerIndex , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	static const int D = sizeof ... ( _PLeftRadii ) + 1;
+	unsigned int count=0;
+	for( int i=-(int)_CLeftRadius ; i<=(int)_CRightRadius ; i++ )
+	{
+		int _i = (i+c[Dim-D]) + ( _CLeftRadius<<1 ) , pi = ( _i>>1 ) - _CLeftRadius + _PLeftRadius  , ci = i + _CLeftRadius;
+		count += _Run< CreateNodes , UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > >::Run( pNeighbors[pi] , cNeighbors[ci] , c , cornerIndex | ( ( _i&1)<<(Dim-D) ) , nodeAllocator , Initializer );
+	}
+	return count;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int _PLeftRadius , unsigned int _PRightRadius , unsigned int _CLeftRadius , unsigned int _CRightRadius >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_Run< CreateNodes , UIntPack< _PLeftRadius > , UIntPack< _PRightRadius > , UIntPack< _CLeftRadius > , UIntPack< _CRightRadius > >::Run( ConstWindowSlice< RegularTreeNode* , UIntPack< _PLeftRadius+_PRightRadius+1 > > pNeighbors , WindowSlice< RegularTreeNode* , UIntPack< _CLeftRadius+_CRightRadius+1 > > cNeighbors , int* c , int cornerIndex , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	static const int D = 1;
+	unsigned int count=0;
+	for( int i=-(int)_CLeftRadius ; i<=(int)_CRightRadius ; i++ )
+	{
+		int _i = (i+c[Dim-1]) + ( _CLeftRadius<<1 ) , pi = ( _i>>1 ) - _CLeftRadius + _PLeftRadius  , ci = i + _CLeftRadius;
+		if( CreateNodes )
+		{
+			if( pNeighbors[pi] )
+			{
+				if( !pNeighbors[pi]->children )
+#pragma omp critical ( RegularTreeNode__NeighborKey__Run )
+					if( !pNeighbors[pi]->children ) pNeighbors[pi]->initChildren( nodeAllocator , Initializer );
+				cNeighbors[ci] = pNeighbors[pi]->children + ( cornerIndex | ( ( _i&1)<<(Dim-1) ) );
+				count++;
+			}
+			else cNeighbors[ci] = NULL;
+		}
+		else
+		{
+			if( pNeighbors[pi] && pNeighbors[pi]->children ) cNeighbors[ci] = pNeighbors[pi]->children + ( cornerIndex | ( ( _i&1)<<(Dim-1) ) ) , count++;
+			else cNeighbors[ci] = NULL;
+		}
+	}
+	return count;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getChildNeighbors( int cIdx , int d , NeighborType& cNeighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer ) const
+{
+	NeighborType& pNeighbors = neighbors[d];
+	// Check that we actually have a center node
+	if( !pNeighbors.neighbors.data[ CenterIndex ] ) return 0;
+
+	return _NeighborsLoop< CreateNodes >( UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , pNeighbors.neighbors() , cNeighbors.neighbors() , cIdx , nodeAllocator , Initializer );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , class Real >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getChildNeighbors( Point< Real , Dim > p , int d , NeighborType& cNeighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer ) const
+{
+	NeighborType& pNeighbors = neighbors[d];
+	// Check that we actually have a center node
+	if( !pNeighbors.neighbors.data[ CenterIndex ] ) return 0;
+	Point< Real , Dim > c;
+	Real w;
+	pNeighbors.neighbors.data[ CenterIndex ]->centerAndWidth( c , w );
+	return getChildNeighbors< CreateNodes >( CornerIndex( c , p ) , d , cNeighbors , nodeAllocator , Initializer );
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes >
+typename RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::template Neighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* node , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	NeighborType& neighbors = this->neighbors[node->depth()];
+	// This is required in case the neighbors have been constructed between the last call to getNeighbors and this one
+	if( node==neighbors.neighbors.data[ CenterIndex ] )
+	{
+		bool reset = false;
+		for( int i=0 ; i<WindowSize< UIntPack< ( LeftRadii+RightRadii+1 ) ... > >::Size ; i++ ) if( !neighbors.neighbors.data[i] ) reset = true;
+		if( reset ) neighbors.neighbors.data[ CenterIndex ] = NULL;
+	}
+	if( node!=neighbors.neighbors.data[ CenterIndex ] )
+	{
+		for( int d=node->depth()+1 ; d<=_depth && this->neighbors[d].neighbors.data[ CenterIndex ] ; d++ ) this->neighbors[d].neighbors.data[ CenterIndex ] = NULL;
+		neighbors.clear();
+		if( !node->parent ) neighbors.neighbors.data[ CenterIndex ] = node;
+		else _NeighborsLoop< CreateNodes >( UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , getNeighbors< CreateNodes >( node->parent , nodeAllocator , Initializer ).neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) , nodeAllocator , Initializer );
+	}
+	return neighbors;
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	static const unsigned int _CenterIndex = WindowIndex< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > , UIntPack< _LeftRadii ... > >::Index;
+	neighbors.clear();
+	if( !node ) return;
+
+	// [WARNING] This estimate of the required radius is somewhat conservative if the readius is odd (depending on where the node is relative to its parent)
+	UIntPack<  LeftRadii ... >  leftRadii;
+	UIntPack< RightRadii ... > rightRadii;
+	UIntPack< (  _LeftRadii+1 )/2 ... >  pLeftRadii;
+	UIntPack< ( _RightRadii+1 )/2 ... > pRightRadii;
+	UIntPack<  _LeftRadii ... >  cLeftRadii;
+	UIntPack< _RightRadii ... > cRightRadii;
+
+	// If we are at the root of the tree, we are done
+	if( !node->parent ) neighbors.neighbors.data[ _CenterIndex ] = node;
+	// If we can get the data from the the key for the parent node, do that
+	else if( pLeftRadii<=leftRadii && pRightRadii<=rightRadii )
+	{
+		getNeighbors< CreateNodes >( node->parent , nodeAllocator , Initializer );
+		const Neighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& pNeighbors = this->neighbors[ node->depth()-1 ];
+		_NeighborsLoop< CreateNodes >( leftRadii , rightRadii , cLeftRadii , cRightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) , nodeAllocator , Initializer );
+	}
+	// Otherwise recurse
+	else
+	{
+		Neighbors< UIntPack< ( ( _LeftRadii+1 )/2  + ( _RightRadii+1 )/2 + 1 ) ... > > pNeighbors;
+		getNeighbors< CreateNodes >( pLeftRadii , pRightRadii , node->parent , pNeighbors , nodeAllocator , Initializer );
+		_NeighborsLoop< CreateNodes >( pLeftRadii , pRightRadii , cLeftRadii , cRightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ), nodeAllocator , Initializer );
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< bool CreateNodes , unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::NeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* node , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& pNeighbors , Neighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors , Allocator< RegularTreeNode >* nodeAllocator , std::function< void ( RegularTreeNode& ) > Initializer )
+{
+	UIntPack<  _LeftRadii ... >  leftRadii;
+	UIntPack< _RightRadii ... > rightRadii;
+	if( !node->parent ) getNeighbors< CreateNodes >( leftRadii , rightRadii , node , neighbors , nodeAllocator , Initializer );
+	else
+	{
+		getNeighbors< CreateNodes >( leftRadii , rightRadii , node->parent , pNeighbors , nodeAllocator , Initializer );
+		_NeighborsLoop< CreateNodes >( leftRadii , rightRadii , leftRadii , rightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ), nodeAllocator , Initializer );
+	}
+}
+
+///////////////////////////////////////
+// RegularTreeNode::ConstNeighborKey //
+///////////////////////////////////////
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::ConstNeighborKey( void ){ _depth=-1 , neighbors=NULL; }
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::ConstNeighborKey( const ConstNeighborKey& key )
+{
+	_depth = 0 , neighbors = NULL;
+	set( key._depth );
+	for( int d=0 ; d<=_depth ; d++ ) memcpy( &neighbors[d] , &key.neighbors[d] , sizeof( ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > > ) );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::~ConstNeighborKey( void )
+{
+	if( neighbors ) delete[] neighbors;
+	neighbors=NULL;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+typename RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::template ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >& RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::operator = ( const ConstNeighborKey& key )
+{
+	set( key._depth );
+	for( int d=0 ; d<=_depth ; d++ ) memcpy( &neighbors[d] , &key.neighbors[d] , sizeof( ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > > ) );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::set( int d )
+{
+	if( neighbors ) delete[] neighbors;
+	neighbors = NULL;
+	_depth = d;
+	if( d<0 ) return;
+	neighbors = new NeighborType[d+1];
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_NeighborsLoop( UIntPack< _PLeftRadii ... > pLeftRadii , UIntPack< _PRightRadii ... > pRightRadii , UIntPack< _CLeftRadii ... > cLeftRadii , UIntPack< _CRightRadii ... > cRightRadii , ConstWindowSlice< const RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int cIdx )
+{
+	static_assert( Dim==sizeof ... ( _PLeftRadii ) && Dim==sizeof ... ( _PRightRadii ) && Dim==sizeof ... ( _CLeftRadii ) && Dim==sizeof ... ( _CRightRadii ) , "[ERROR] Dimensions don't match" );
+	int c[Dim];
+	for( int d=0 ; d<Dim ; d++ ) c[d] = ( cIdx>>d ) & 1;
+	return _Run< UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack< _CRightRadii ... > >::Run( pNeighbors , cNeighbors , c , 0 );
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int ... _PLeftRadii , unsigned int ... _PRightRadii , unsigned int ... _CLeftRadii , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_NeighborsLoop( UIntPack< _PLeftRadii ... > pLeftRadii , UIntPack< _PRightRadii ... > pRightRadii , UIntPack< _CLeftRadii ... > cLeftRadii , UIntPack< _CRightRadii ... > cRightRadii , WindowSlice< const RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int cIdx )
+{
+	return _NeighborsLoop( UIntPack< _PLeftRadii ... >() , UIntPack< _PRightRadii ... >() , UIntPack< _CLeftRadii ... >() , UIntPack< _CRightRadii ... >() , ( ConstWindowSlice< const RegularTreeNode* , UIntPack< ( _PLeftRadii + _PRightRadii + 1 ) ... > > )pNeighbors , cNeighbors , cIdx );
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int _PLeftRadius , unsigned int ... _PLeftRadii , unsigned int _PRightRadius , unsigned int ... _PRightRadii , unsigned int _CLeftRadius , unsigned int ... _CLeftRadii , unsigned int _CRightRadius , unsigned int ... _CRightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_Run< UIntPack< _PLeftRadius , _PLeftRadii ... > , UIntPack< _PRightRadius , _PRightRadii ... > , UIntPack< _CLeftRadius , _CLeftRadii ... > , UIntPack< _CRightRadius , _CRightRadii ... > >::Run( ConstWindowSlice< const RegularTreeNode* , UIntPack< _PLeftRadius + _PRightRadius + 1 , ( _PLeftRadii + _PRightRadii + 1 ) ... > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< _CLeftRadius + _CRightRadius + 1 , ( _CLeftRadii + _CRightRadii + 1 ) ... > > cNeighbors , int* c , int cornerIndex )
+{
+	static const int D = sizeof ... ( _PLeftRadii ) + 1;
+	unsigned int count=0;
+	for( int i=-(int)_CLeftRadius ; i<=(int)_CRightRadius ; i++ )
+	{
+		int _i = (i+c[Dim-D]) + ( _CLeftRadius<<1 ) , pi = ( _i>>1 ) - _CLeftRadius + _PLeftRadius  , ci = i + _CLeftRadius;
+		count += _Run< UIntPack< _PLeftRadii ... > , UIntPack< _PRightRadii ... > , UIntPack< _CLeftRadii ... > , UIntPack<  _CRightRadii ... > >::Run( pNeighbors[pi] , cNeighbors[ci] , c , cornerIndex | ( ( _i&1)<<(Dim-D) ) );
+	}
+	return count;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int _PLeftRadius , unsigned int _PRightRadius , unsigned int _CLeftRadius , unsigned int _CRightRadius  >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::_Run< UIntPack< _PLeftRadius > , UIntPack< _PRightRadius > , UIntPack< _CLeftRadius > , UIntPack< _CRightRadius > >::Run( ConstWindowSlice< const RegularTreeNode* , UIntPack< _PLeftRadius+_PRightRadius+1 > > pNeighbors , WindowSlice< const RegularTreeNode* , UIntPack< _CLeftRadius+_CRightRadius+1 > > cNeighbors , int* c , int cornerIndex )
+{
+	static const int D = 1;
+	unsigned int count=0;
+	for( int i=-(int)_CLeftRadius ; i<=(int)_CRightRadius ; i++ )
+	{
+		int _i = (i+c[Dim-D]) + ( _CLeftRadius<<1 ) , pi = ( _i>>1 ) - _CLeftRadius + _PLeftRadius  , ci = i + _CLeftRadius;
+		if( pNeighbors[pi] && pNeighbors[pi]->children ) cNeighbors[ci] = pNeighbors[pi]->children + ( cornerIndex | ( ( _i&1)<<(Dim-1) ) ) , count++;
+		else cNeighbors[ci] = NULL;
+	}
+	return count;
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getChildNeighbors( int cIdx , int d , ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& cNeighbors ) const
+{
+	const ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& pNeighbors = neighbors[d];
+	// Check that we actually have a center node
+	if( !pNeighbors.neighbors.data[ CenterIndex ] ) return 0;
+
+	return _NeighborsLoop( UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , pNeighbors.neighbors() , cNeighbors.neighbors() , cIdx );
+}
+
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+typename RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::template ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( const RegularTreeNode* node )
+{
+	ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& neighbors = this->neighbors[ node->depth() ];
+	if( node!=neighbors.neighbors.data[ CenterIndex ] )
+	{
+		for( int d=node->depth()+1 ; d<=_depth && this->neighbors[d].neighbors.data[ CenterIndex ] ; d++ ) this->neighbors[d].neighbors.data[ CenterIndex ] = NULL;
+		neighbors.clear();
+		if( !node->parent ) neighbors.neighbors.data[ CenterIndex ] = node;
+		else _NeighborsLoop( UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , UIntPack< LeftRadii ... >() , UIntPack< RightRadii ... >() , getNeighbors( node->parent ).neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) );
+	}
+	return neighbors;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* node , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors )
+{
+	static const unsigned int _CenterIndex = WindowIndex< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > , UIntPack< _LeftRadii ... > >::Index;
+
+	neighbors.clear();
+	if( !node ) return;
+
+	UIntPack<  LeftRadii ... >  leftRadii;
+	UIntPack< RightRadii ... > rightRadii;
+	UIntPack< (  _LeftRadii+1 )/2 ... >  pLeftRadii;
+	UIntPack< ( _RightRadii+1 )/2 ... > pRightRadii;
+	UIntPack<  _LeftRadii ... >  cLeftRadii;
+	UIntPack< _RightRadii ... > cRightRadii;
+	// If we are at the root of the tree, we are done
+	if( !node->parent ) neighbors.neighbors.data[ _CenterIndex ] = node;
+	// If we can get the data from the the key for the parent node, do that
+	else if( pLeftRadii<=leftRadii && pRightRadii<=rightRadii )
+	{
+		getNeighbors( node->parent );
+		const ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& pNeighbors = this->neighbors[ node->depth()-1 ];
+		_NeighborsLoop( leftRadii , rightRadii , cLeftRadii , cRightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) );
+	}
+	// Otherwise recurse
+	else
+	{
+		ConstNeighbors< UIntPack< ( ( _LeftRadii+1 )/2  + ( _RightRadii+1 )/2 + 1 ) ... > > pNeighbors;
+		getNeighbors( pLeftRadii , pRightRadii , node->parent , pNeighbors );
+		_NeighborsLoop( pLeftRadii , pRightRadii , cLeftRadii , cRightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) );
+	}
+	return;
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< unsigned int ... _LeftRadii , unsigned int ... _RightRadii >
+void RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getNeighbors( UIntPack< _LeftRadii ... > , UIntPack< _RightRadii ... > , const RegularTreeNode< Dim , NodeData , DepthAndOffsetType >* node , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& pNeighbors , ConstNeighbors< UIntPack< ( _LeftRadii + _RightRadii + 1 ) ... > >& neighbors )
+{
+	UIntPack<  _LeftRadii ... >  leftRadii;
+	UIntPack< _RightRadii ... > rightRadii;
+	if( !node->parent ) return getNeighbors( leftRadii , rightRadii , node , neighbors );
+	else
+	{
+		 getNeighbors( leftRadii , rightRadii , node->parent , pNeighbors );
+		_NeighborsLoop( leftRadii , rightRadii , leftRadii , rightRadii , pNeighbors.neighbors() , neighbors.neighbors() , (int)( node - node->parent->children ) );
+	}
+}
+template< unsigned int Dim , class NodeData , class DepthAndOffsetType >
+template< unsigned int ... LeftRadii , unsigned int ... RightRadii >
+template< class Real >
+unsigned int RegularTreeNode< Dim , NodeData , DepthAndOffsetType >::ConstNeighborKey< UIntPack< LeftRadii ... > , UIntPack< RightRadii ... > >::getChildNeighbors( Point< Real , Dim > p , int d , ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& cNeighbors ) const
+{
+	ConstNeighbors< UIntPack< ( LeftRadii + RightRadii + 1 ) ... > >& pNeighbors = neighbors[d];
+	// Check that we actually have a center node
+	if( !pNeighbors.neighbors.data[ CenterIndex ] ) return 0;
+	Point< Real , Dim > c;
+	Real w;
+	pNeighbors.neighbors.data[ CenterIndex ]->centerAndWidth( c , w );
+	int cIdx = 0;
+	for( int d=0 ; d<Dim ; d++ ) if( p[d]>c[d] ) cIdx |= (1<<d);
+	return getChildNeighbors( cIdx , d , cNeighbors );
+}
diff --git a/Src/SSDRecon.cpp b/Src/SSDRecon.cpp
index 9fe6e7c..e966acd 100644
--- a/Src/SSDRecon.cpp
+++ b/Src/SSDRecon.cpp
@@ -26,295 +26,164 @@ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF S
 DAMAGE.
 */
 
-#undef FAST_COMPILE
-#undef ARRAY_DEBUG
-#define BRUNO_LEVY_FIX
-#define FOR_RELEASE
+#undef SHOW_WARNINGS							// Display compilation warnings
+#undef USE_DOUBLE								// If enabled, double-precesion is used
+#undef FAST_COMPILE								// If enabled, only a single version of the reconstruction code is compiled
+#undef ARRAY_DEBUG								// If enabled, array access is tested for validity
+#define DATA_DEGREE 0							// The order of the B-Spline used to splat in data for color interpolation
+												// This can be changed to zero if more interpolatory performance is desired.
+#define WEIGHT_DEGREE 2							// The order of the B-Spline used to splat in the weights for density estimation
+#define NORMAL_DEGREE 2							// The order of the B-Spline used to splat int the normals for constructing the Laplacian constraints
+#define DEFAULT_FEM_DEGREE 2					// The default finite-element degree
+#define DEFAULT_FEM_BOUNDARY BOUNDARY_NEUMANN	// The default finite-element boundary type
+#define DIMENSION 3								// The dimension of the system
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
-#if defined( _WIN32 ) || defined( _WIN64 )
-#include <Windows.h>
-#include <Psapi.h>
-#endif // _WIN32 || _WIN64
-#include "MyTime.h"
-#include "MarchingCubes.h"
-#include "Octree.h"
-#include "SparseMatrix.h"
+#include "MyMiscellany.h"
 #include "CmdLineParser.h"
 #include "PPolynomial.h"
+#include "FEMTree.h"
 #include "Ply.h"
-#include "MemoryUsage.h"
-#ifdef _OPENMP
-#include "omp.h"
-#endif // _OPENMP
-void DumpOutput( const char* format , ... );
-void DumpOutput2( std::vector< char* >& comments , const char* format , ... );
-#include "MultiGridOctreeData.h"
-
-#define DEFAULT_FULL_DEPTH 5
-
-#define XSTR(x) STR(x)
-#define STR(x) #x
-#if DEFAULT_FULL_DEPTH
-#pragma message ( "[WARNING] Setting default full depth to " XSTR(DEFAULT_FULL_DEPTH) )
-#endif // DEFAULT_FULL_DEPTH
-
-#include <stdarg.h>
-char* outputFile=NULL;
-int echoStdout=0;
-void DumpOutput( const char* format , ... )
-{
-	if( outputFile )
-	{
-		FILE* fp = fopen( outputFile , "a" );
-		va_list args;
-		va_start( args , format );
-		vfprintf( fp , format , args );
-		fclose( fp );
-		va_end( args );
-	}
-	if( echoStdout )
-	{
-		va_list args;
-		va_start( args , format );
-		vprintf( format , args );
-		va_end( args );
-	}
-}
-void DumpOutput2( std::vector< char* >& comments  , const char* format , ... )
-{
-	if( outputFile )
-	{
-		FILE* fp = fopen( outputFile , "a" );
-		va_list args;
-		va_start( args , format );
-		vfprintf( fp , format , args );
-		fclose( fp );
-		va_end( args );
-	}
-	if( echoStdout )
-	{
-		va_list args;
-		va_start( args , format );
-		vprintf( format , args );
-		va_end( args );
-	}
-	comments.push_back( new char[1024] );
-	char* str = comments.back();
-	va_list args;
-	va_start( args , format );
-	vsprintf( str , format , args );
-	va_end( args );
-	if( str[strlen(str)-1]=='\n' ) str[strlen(str)-1] = 0;
-}
+#include "PointStreamData.h"
+#include "Image.h"
+
+MessageWriter messageWriter;
 
+double BaseSSDWeights[] = { 5e+1f , 5e-4f , 1e-5f }; 
 
-cmdLineString
+cmdLineParameter< char* >
 	In( "in" ) ,
 	Out( "out" ) ,
 	TempDir( "tempDir" ) ,
-	VoxelGrid( "voxel" ) ,
-	XForm( "xForm" );
+	Grid( "grid" ) ,	
+	Tree( "tree" ) ,
+	Transform( "xForm" );
 
 cmdLineReadable
-#if defined( _WIN32 ) || defined( _WIN64 )
 	Performance( "performance" ) ,
-#endif // _WIN32 || _WIN64
 	ShowResidual( "showResidual" ) ,
 	NoComments( "noComments" ) ,
 	PolygonMesh( "polygonMesh" ) ,
-	Confidence( "confidence" ) ,
-	NormalWeights( "nWeights" ) ,
 	NonManifold( "nonManifold" ) ,
 	ASCII( "ascii" ) ,
 	Density( "density" ) ,
 	NonLinearFit( "nonLinearFit" ) ,
-	PrimalVoxel( "primalVoxel" ) ,
-#ifndef FAST_COMPILE
-	FreeBoundary( "freeBoundary" ) ,
-	Double( "double" ) ,
-#endif // !FAST_COMPILE
+	PrimalGrid( "primalGrid" ) ,
+	ExactInterpolation( "exact" ) ,
+	Normals( "normals" ) ,
+	Colors( "colors" ) ,
+	InCore( "inCore" ) ,
 	Verbose( "verbose" );
 
-cmdLineInt
+cmdLineParameter< int >
 #ifndef FAST_COMPILE
-	Degree( "degree" , 2 ) ,
+	Degree( "degree" , DEFAULT_FEM_DEGREE ) ,
 #endif // !FAST_COMPILE
 	Depth( "depth" , 8 ) ,
-	CGDepth( "cgDepth" , 0 ) ,
 	KernelDepth( "kernelDepth" ) ,
-	AdaptiveExponent( "adaptiveExp" , 1 ) ,
 	Iters( "iters" , 8 ) ,
-	VoxelDepth( "voxelDepth" , -1 ) ,
-	FullDepth( "fullDepth" , DEFAULT_FULL_DEPTH ) ,
-	MaxSolveDepth( "maxSolveDepth" ) ,
+	FullDepth( "fullDepth" , 5 ) ,
+	BaseDepth( "baseDepth" , 5 ) ,
+	BaseVCycles( "baseVCycles" , 4 ) ,
+#ifndef FAST_COMPILE
+	BType( "bType" , DEFAULT_FEM_BOUNDARY+1 ) ,
+#endif // !FAST_COMPILE
+	MaxMemoryGB( "maxMemory" , 0 ) ,
 	Threads( "threads" , omp_get_num_procs() );
 
-cmdLineFloat
-	Color( "color" , 16.f ) ,
+cmdLineParameter< float >
+	DataX( "data" , 32.f ) ,
 	SamplesPerNode( "samplesPerNode" , 1.5f ) ,
 	Scale( "scale" , 1.1f ) ,
+	Width( "width" , 0.f ) ,
+	Confidence( "confidence" , 0.f ) ,
+	ConfidenceBias( "confidenceBias" , 0.f ) ,
 	CGSolverAccuracy( "cgAccuracy" , 1e-3f ) ,
-	LowResIterMultiplier( "iterMultiplier" , 1.5f ) , 
-	ValueWeight   (    "valueWeight" , 4e-0f ) , 
-	GradientWeight( "gradientWeight" , 1e-3f ) ,
-	BiLapWeight   (    "biLapWeight" , 1e-5f );
+	ValueWeight   (    "valueWeight" , 1.f ) ,
+	GradientWeight( "gradientWeight" , 1.f ) ,
+	BiLapWeight   (    "biLapWeight" , 1.f );
 
 
 cmdLineReadable* params[] =
 {
 #ifndef FAST_COMPILE
-	&Degree , &Double , &FreeBoundary ,
+	&Degree , &BType ,
 #endif // !FAST_COMPILE
-	&In , &Depth , &Out , &XForm ,
-	&Scale , &Verbose , &CGSolverAccuracy , &NoComments , &LowResIterMultiplier ,
-	&KernelDepth , &SamplesPerNode , &Confidence , &NormalWeights , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual , &VoxelDepth ,
-	&BiLapWeight ,
-	&ValueWeight , &GradientWeight , &VoxelGrid , &Threads , &MaxSolveDepth ,
-	&AdaptiveExponent ,
+	&In , &Depth , &Out , &Transform ,
+	&Width ,
+	&Scale , &Verbose , &CGSolverAccuracy , &NoComments ,
+	&KernelDepth , &SamplesPerNode , &Confidence , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual ,
+	&ConfidenceBias ,
+	&ValueWeight , &GradientWeight , &BiLapWeight ,
+	&Grid , &Threads ,
+	&Tree ,
 	&Density ,
 	&FullDepth ,
-	&CGDepth , &Iters ,
-	&Color ,
+	&BaseDepth , &BaseVCycles ,
+	&Iters ,
+	&DataX ,
+	&Colors ,
+	&Normals ,
 	&NonLinearFit ,
-	&PrimalVoxel ,
+	&PrimalGrid ,
 	&TempDir ,
-#if defined( _WIN32 ) || defined( _WIN64 )
+	&ExactInterpolation ,
 	&Performance ,
-#endif // _WIN32 || _WIN64
+	&MaxMemoryGB ,
+	&InCore ,
+	NULL
 };
 
-
-void ShowUsage( char* ex )
+void ShowUsage(char* ex)
 {
 	printf( "Usage: %s\n" , ex );
 	printf( "\t --%s <input points>\n" , In.name );
-
 	printf( "\t[--%s <ouput triangle mesh>]\n" , Out.name );
-
-	printf( "\t[--%s <ouput voxel grid>]\n" , VoxelGrid.name );
-
+	printf( "\t[--%s <ouput grid>]\n" , Grid.name );
+	printf( "\t[--%s <ouput fem tree>]\n" , Tree.name );
 #ifndef FAST_COMPILE
 	printf( "\t[--%s <b-spline degree>=%d]\n" , Degree.name , Degree.value );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s]\n" , FreeBoundary.name );
-#endif // !FOR_RELEASE
+	printf( "\t[--%s <boundary type>=%d]\n" , BType.name , BType.value );
+	for( int i=0 ; i<BOUNDARY_COUNT ; i++ ) printf( "\t\t%d] %s\n" , i+1 , BoundaryNames[i] );
 #endif // !FAST_COMPILE
-
 	printf( "\t[--%s <maximum reconstruction depth>=%d]\n" , Depth.name , Depth.value );
-
+	printf( "\t[--%s <grid width>]\n" , Width.name );
+	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
+	printf( "\t[--%s <coarse MG solver depth>=%d]\n" , BaseDepth.name , BaseDepth.value );
+	printf( "\t[--%s <coarse MG solver v-cycles>=%d]\n" , BaseVCycles.name , BaseVCycles.value );
 	printf( "\t[--%s <scale factor>=%f]\n" , Scale.name , Scale.value );
-
 	printf( "\t[--%s <minimum number of samples per node>=%f]\n" , SamplesPerNode.name, SamplesPerNode.value );
-
 	printf( "\t[--%s <zero-crossing weight>=%.3e]\n" , ValueWeight.name , ValueWeight.value );
-
 	printf( "\t[--%s <gradient weight>=%.3e]\n" , GradientWeight.name , GradientWeight.value );
-
 	printf( "\t[--%s <bi-laplacian weight>=%.3e]\n" , BiLapWeight.name , BiLapWeight.value );
-
-	printf( "\t[--%s]\n" , Confidence.name );
-
-	printf( "\t[--%s]\n" , NormalWeights.name );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <adaptive weighting exponent>=%d]\n", AdaptiveExponent.name , AdaptiveExponent.value );
-#endif // !FOR_RELEASE
-
 	printf( "\t[--%s <iterations>=%d]\n" , Iters.name , Iters.value );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <low-resolution iteration multiplier>=%f]\n" , LowResIterMultiplier.name , LowResIterMultiplier.value );
-#endif // FOR_RELEASE
-
-	printf( "\t[--%s <conjugate-gradients depth>=%d]\n" , CGDepth.name , CGDepth.value );
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s <conjugate-gradients solver accuracy>=%g]\n" , CGSolverAccuracy.name , CGSolverAccuracy.value );
-#endif // !FOR_RELEASE
-
-	printf( "\t[--%s <full depth>=%d]\n" , FullDepth.name , FullDepth.value );
-
-	printf( "\t[--%s <depth at which to extract the voxel grid>=<%s>]\n" , VoxelDepth.name , Depth.name );
-
-	printf( "\t[--%s]\n" , PrimalVoxel.name );
-
-	printf( "\t[--%s <pull factor>]\n" , Color.name );
-
-	printf( "\t[--%s]\n" , Density.name );
-
-	printf( "\t[--%s]\n" , NonLinearFit.name );
-
-	printf( "\t[--%s]\n" , PolygonMesh.name);
-
-#ifndef FOR_RELEASE
-	printf( "\t[--%s]\n" , NonManifold.name );
-#endif // !FOR_RELEASE
-
+	printf( "\t[--%s]\n" , ExactInterpolation.name );
+	printf( "\t[--%s <pull factor>=%f]\n" , DataX.name , DataX.value );
+	printf( "\t[--%s]\n" , Colors.name );
+	printf( "\t[--%s]\n" , Normals.name );
 #ifdef _OPENMP
 	printf( "\t[--%s <num threads>=%d]\n" , Threads.name , Threads.value );
 #endif // _OPENMP
-
-	printf( "\t[--%s]\n" , TempDir.name );
-
-	printf( "\t[--%s]\n" , Verbose.name );
-
-#ifndef FOR_RELEASE
-#if defined( _WIN32 ) || defined( _WIN64 )
+	printf( "\t[--%s <normal confidence exponent>=%f]\n" , Confidence.name , Confidence.value );
+	printf( "\t[--%s <normal confidence bias exponent>=%f]\n" , ConfidenceBias.name , ConfidenceBias.value );
+	printf( "\t[--%s]\n" , NonManifold.name );
+	printf( "\t[--%s]\n" , PolygonMesh.name );
+	printf( "\t[--%s <cg solver accuracy>=%g]\n" , CGSolverAccuracy.name , CGSolverAccuracy.value );
+	printf( "\t[--%s <maximum memory (in GB)>=%d]\n" , MaxMemoryGB.name , MaxMemoryGB.value );
 	printf( "\t[--%s]\n" , Performance.name );
-#endif // _WIN32 || _WIN64
-
-#endif // !FOR_RELEASE
-#ifndef FOR_RELEASE
+	printf( "\t[--%s]\n" , Density.name );
+	printf( "\t[--%s]\n" , NonLinearFit.name );
+	printf( "\t[--%s]\n" , PrimalGrid.name );
 	printf( "\t[--%s]\n" , ASCII.name );
-	
 	printf( "\t[--%s]\n" , NoComments.name );
-#endif // !FOR_RELEASE
-	
-#ifndef FAST_COMPILE
-	printf( "\t[--%s]\n" , Double.name );
-#endif // !FAST_COMPILE
+	printf( "\t[--%s]\n" , TempDir.name );
+	printf( "\t[--%s]\n" , InCore.name );
+	printf( "\t[--%s]\n" , Verbose.name );
 }
 
-template< class Real >
-struct ColorInfo
-{
-	static Point3D< Real > ReadASCII( FILE* fp )
-	{
-		Point3D< unsigned char > c;
-		if( fscanf( fp , " %c %c %c " , &c[0] , &c[1] , &c[2] )!=3 ) fprintf( stderr , "[ERROR] Failed to read color\n" ) , exit( 0 );
-		return Point3D< Real >( (Real)c[0] , (Real)c[1] , (Real)c[2] );
-	};
-	static bool ValidPlyProperties( const bool* props ){ return ( props[0] || props[3] ) && ( props[1] || props[4] ) && ( props[2] || props[5] ); }
-	const static PlyProperty PlyProperties[];
-};
-template<>
-const PlyProperty ColorInfo< float >::PlyProperties[] =
-{
-	{ "r"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "g"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "b"     , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "red"   , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[0] ) ) , 0 , 0 , 0 , 0 } , 
-	{ "green" , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "blue"  , PLY_UCHAR , PLY_FLOAT , int( offsetof( Point3D< float > , coords[2] ) ) , 0 , 0 , 0 , 0 }
-};
-template<>
-const PlyProperty ColorInfo< double >::PlyProperties[] =
-{
-	{ "r"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[0] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "g"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "b"     , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[2] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "red"   , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[0] ) ) , 0 , 0 , 0 , 0 } , 
-	{ "green" , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[1] ) ) , 0 , 0 , 0 , 0 } ,
-	{ "blue"  , PLY_UCHAR , PLY_DOUBLE , int( offsetof( Point3D< double > , coords[2] ) ) , 0 , 0 , 0 , 0 }
-};
-
-bool ValidPlyColorProperties( const bool* props ){ return ( props[0] || props[3] ) && ( props[1] || props[4] ) && ( props[2] || props[5] ); }
-
 double Weight( double v , double start , double end )
 {
 	v = ( v - start ) / ( end - start );
@@ -332,213 +201,398 @@ double Weight( double v , double start , double end )
 	}
 }
 
-#if defined( _WIN32 ) || defined( _WIN64 )
-double PeakMemoryUsageMB( void )
-{
-	HANDLE h = GetCurrentProcess();
-	PROCESS_MEMORY_COUNTERS pmc;
-	return GetProcessMemoryInfo( h , &pmc , sizeof(pmc) ) ? ( (double)pmc.PeakWorkingSetSize )/(1<<20) : 0;
-}
-#endif // _WIN32 || _WIN64
-
-
-template< class Real >
-struct OctreeProfiler
+template< unsigned int Dim , class Real >
+struct FEMTreeProfiler
 {
-	Octree< Real >& tree;
+	FEMTree< Dim , Real >& tree;
 	double t;
 
-	OctreeProfiler( Octree< Real >& t ) : tree(t) { ; }
-	void start( void ){ t = Time() , tree.resetLocalMemoryUsage(); }
+	FEMTreeProfiler( FEMTree< Dim , Real >& t ) : tree(t) { ; }
+	void start( void ){ t = Time() , FEMTree< Dim , Real >::ResetLocalMemoryUsage(); }
 	void print( const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) printf( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         printf(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
 	void dumpOutput( const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) DumpOutput( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         DumpOutput(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) DumpOutput( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         DumpOutput(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter(    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
-	void dumpOutput2( std::vector< char* >& comments , const char* header ) const
+	void dumpOutput2( std::vector< std::string >& comments , const char* header ) const
 	{
-		tree.memoryUsage();
-#if defined( _WIN32 ) || defined( _WIN64 )
-		if( header ) DumpOutput2( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-		else         DumpOutput2( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() , PeakMemoryUsageMB() );
-#else // !_WIN32 && !_WIN64
-		if( header ) DumpOutput2( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-		else         DumpOutput2( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , tree.localMemoryUsage() , tree.maxMemoryUsage() );
-#endif // _WIN32 || _WIN64
+		FEMTree< Dim , Real >::MemoryUsage();
+		if( header ) messageWriter( comments , "%s %9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" , header , Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
+		else         messageWriter( comments ,    "%9.1f (s), %9.1f (MB) / %9.1f (MB) / %9.1f (MB)\n" ,          Time()-t , FEMTree< Dim , Real >::LocalMemoryUsage() , FEMTree< Dim , Real >::MaxMemoryUsage() , MemoryInfo::PeakMemoryUsageMB() );
 	}
 };
 
-template< class Real >
-XForm4x4< Real > GetPointXForm( OrientedPointStream< Real >& stream , Real scaleFactor )
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetBoundingBoxXForm( Point< Real , Dim > min , Point< Real , Dim > max , Real scaleFactor )
 {
-	Point3D< Real > min , max;
-	stream.boundingBox( min , max );
-	Point3D< Real > center = ( max + min ) / 2;
-	Real scale = std::max< Real >( max[0]-min[0] , std::max< Real >( max[1]-min[1] , max[2]-min[2] ) );
+	Point< Real , Dim > center = ( max + min ) / 2;
+	Real scale = max[0] - min[0];
+	for( int d=1 ; d<Dim ; d++ ) scale = std::max< Real >( scale , max[d]-min[d] );
 	scale *= scaleFactor;
-	for( int i=0 ; i<3 ; i++ ) center[i] -= scale/2;
-	XForm4x4< Real > tXForm = XForm4x4< Real >::Identity() , sXForm = XForm4x4< Real >::Identity();
-	for( int i=0 ; i<3 ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(3,i) = -center[i];
+	for( int i=0 ; i<Dim ; i++ ) center[i] -= scale/2;
+	XForm< Real , Dim+1 > tXForm = XForm< Real , Dim+1 >::Identity() , sXForm = XForm< Real , Dim+1 >::Identity();
+	for( int i=0 ; i<Dim ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(Dim,i) = -center[i];
+	return sXForm * tXForm;
+}
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetBoundingBoxXForm( Point< Real , Dim > min , Point< Real , Dim > max , Real width , Real scaleFactor , int& depth )
+{
+	// Get the target resolution (along the largest dimension)
+	Real resolution = ( max[0]-min[0] ) / width;
+	for( int d=1 ; d<Dim ; d++ ) resolution = std::max< Real >( resolution , ( max[d]-min[d] ) / width );
+	resolution *= scaleFactor;
+	depth = 0;
+	while( (1<<depth)<resolution ) depth++;
+
+	Point< Real , Dim > center = ( max + min ) / 2;
+	Real scale = (1<<depth) * width;
+
+	for( int i=0 ; i<Dim ; i++ ) center[i] -= scale/2;
+	XForm< Real , Dim+1 > tXForm = XForm< Real , Dim+1 >::Identity() , sXForm = XForm< Real , Dim+1 >::Identity();
+	for( int i=0 ; i<Dim ; i++ ) sXForm(i,i) = (Real)(1./scale ) , tXForm(Dim,i) = -center[i];
 	return sXForm * tXForm;
 }
 
-template< class Real , int Degree , BoundaryType BType , class Vertex >
-int _Execute( int argc , char* argv[] )
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetPointXForm( InputPointStream< Real , Dim >& stream , Real width , Real scaleFactor , int& depth )
+{
+	Point< Real , Dim > min , max;
+	stream.boundingBox( min , max );
+	return GetBoundingBoxXForm( min , max , width , scaleFactor , depth );
+}
+template< class Real , unsigned int Dim >
+XForm< Real , Dim+1 > GetPointXForm( InputPointStream< Real , Dim >& stream , Real scaleFactor )
 {
-	typedef typename Octree< Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
-	typedef typename Octree< Real >::template InterpolationInfo< true > InterpolationInfo;
-	typedef OrientedPointStream< Real > PointStream;
-	typedef OrientedPointStreamWithData< Real , Point3D< Real > > PointStreamWithData;
-	typedef TransformedOrientedPointStream< Real > XPointStream;
-	typedef TransformedOrientedPointStreamWithData< Real , Point3D< Real > > XPointStreamWithData;
-	Reset< Real >();
-	int paramNum = sizeof(params)/sizeof(cmdLineReadable*);
-	std::vector< char* > comments;
-
-	if( Verbose.set ) echoStdout=1;
-
-	XForm4x4< Real > xForm , iXForm;
-	if( XForm.set )
+	Point< Real , Dim > min , max;
+	stream.boundingBox( min , max );
+	return GetBoundingBoxXForm( min , max , scaleFactor );
+}
+
+template< unsigned int Dim , typename Real , typename TotalPointSampleData >
+struct ConstraintDual
+{
+	Real target , vWeight , gWeight;
+	ConstraintDual( Real t , Real v , Real g ) : target(t) , vWeight(v) , gWeight(g) { }
+	CumulativeDerivativeValues< Real , Dim , 1 > operator()( const Point< Real , Dim >& p , const TotalPointSampleData& data ) const 
+	{
+		Point< Real , Dim > n = std::get<0>( data.data ).data;
+		CumulativeDerivativeValues< Real , Dim , 1 > cdv;
+		cdv[0] = target*vWeight;
+		for( int d=0 ; d<Dim ; d++ ) cdv[1+d] = -n[d]*gWeight;
+		return cdv;
+	}
+};
+template< unsigned int Dim , typename Real , typename TotalPointSampleData >
+struct SystemDual
+{
+	CumulativeDerivativeValues< Real , Dim , 1 > weight;
+	SystemDual( Real v , Real g )
+	{
+		weight[0] = v;
+		for( int d=0 ; d<Dim ; d++ ) weight[d+1] = g;
+	}
+	CumulativeDerivativeValues< Real , Dim , 1 > operator()( Point< Real , Dim > p , const TotalPointSampleData& data , const CumulativeDerivativeValues< Real , Dim , 1 >& dValues ) const
 	{
-		FILE* fp = fopen( XForm.value , "r" );
+		return dValues * weight;
+	}
+	CumulativeDerivativeValues< double , Dim , 1 > operator()( Point< Real , Dim > p , const TotalPointSampleData& data , const CumulativeDerivativeValues< double , Dim , 1 >& dValues ) const
+	{
+		return dValues * weight;
+	};
+};
+template< unsigned int Dim , class TotalPointSampleData >
+struct SystemDual< Dim , double , TotalPointSampleData >
+{
+	typedef double Real;
+	CumulativeDerivativeValues< Real , Dim , 1 > weight;
+	SystemDual( Real v , Real g ) : weight( v , g , g , g ) { }
+	CumulativeDerivativeValues< Real , Dim , 1 > operator()( Point< Real , Dim > p , const TotalPointSampleData& data , const CumulativeDerivativeValues< Real , Dim , 1 >& dValues ) const
+	{
+		return dValues * weight;
+	}
+};
+
+template< typename Vertex , typename Real , unsigned int ... FEMSigs , typename ... SampleData >
+void ExtractMesh( UIntPack< FEMSigs ... > , std::tuple< SampleData ... > , FEMTree< sizeof ... ( FEMSigs ) , Real >& tree , const DenseNodeData< Real , UIntPack< FEMSigs ... > >& solution , Real isoValue , const std::vector< typename FEMTree< sizeof ... ( FEMSigs ) , Real >::PointSample >* samples , std::vector< MultiPointStreamData< Real , PointStreamNormal< Real , DIMENSION > , MultiPointStreamData< Real , SampleData ... > > >* sampleData , const typename FEMTree< sizeof ... ( FEMSigs ) , Real >::template DensityEstimator< WEIGHT_DEGREE >* density , std::function< void ( Vertex& , Point< Real , DIMENSION > , Real , MultiPointStreamData< Real , PointStreamNormal< Real , DIMENSION > , MultiPointStreamData< Real , SampleData ... > > ) > SetVertex , std::vector< std::string > &comments , XForm< Real , sizeof...(FEMSigs)+1 > iXForm )
+{
+	static const int Dim = sizeof ... ( FEMSigs );
+	typedef UIntPack< FEMSigs ... > Sigs;
+	typedef PointStreamNormal< Real , Dim > NormalPointSampleData;
+	typedef MultiPointStreamData< Real , SampleData ... > AdditionalPointSampleData;
+	typedef MultiPointStreamData< Real , NormalPointSampleData , AdditionalPointSampleData > TotalPointSampleData;
+	static const unsigned int DataSig = FEMDegreeAndBType< DATA_DEGREE , BOUNDARY_FREE >::Signature;
+	typedef typename FEMTree< Dim , Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
+
+	FEMTreeProfiler< Dim , Real > profiler( tree );
+
+	char tempHeader[1024];
+	{
+		char tempPath[1024];
+		tempPath[0] = 0;
+		if( TempDir.set ) strcpy( tempPath , TempDir.value );
+		else SetTempDirectory( tempPath , sizeof(tempPath) );
+		if( strlen(tempPath)==0 ) sprintf( tempPath , ".%c" , FileSeparator );
+		if( tempPath[ strlen( tempPath )-1 ]==FileSeparator ) sprintf( tempHeader , "%sPR_" , tempPath );
+		else                                                  sprintf( tempHeader , "%s%cPR_" , tempPath , FileSeparator );
+	}
+
+	CoredMeshData< Vertex > *mesh;
+	if( InCore.set ) mesh = new CoredVectorMeshData< Vertex >();
+	else             mesh = new CoredFileMeshData< Vertex >( tempHeader );
+	profiler.start();
+	typename IsoSurfaceExtractor< Dim , Real , Vertex >::IsoStats isoStats;
+	if( sampleData )
+	{
+		SparseNodeData< ProjectiveData< TotalPointSampleData , Real > , IsotropicUIntPack< Dim , DataSig > > _sampleData = tree.template setDataField< DataSig , false >( *samples , *sampleData , (DensityEstimator*)NULL );
+		for( const RegularTreeNode< Dim , FEMTreeNodeData >* n = tree.tree().nextNode() ; n ; n=tree.tree().nextNode( n ) )
+		{
+			ProjectiveData< TotalPointSampleData , Real >* clr = _sampleData( n );
+			if( clr ) (*clr) *= (Real)pow( DataX.value , tree.depth( n ) );
+		}
+		isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , &_sampleData , solution , isoValue , *mesh , SetVertex , NonLinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+	}
+#if defined( __GNUC__ ) && __GNUC__ < 5
+	#warning "you've got me gcc version<5"
+	else isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , (SparseNodeData< ProjectiveData< TotalPointSampleData , Real > , IsotropicUIntPack< Dim , DataSig > > *)NULL , solution , isoValue , *mesh , SetVertex , NonLinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+#else // !__GNUC__ || __GNUC__ >=5
+	else isoStats = IsoSurfaceExtractor< Dim , Real , Vertex >::template Extract< TotalPointSampleData >( Sigs() , UIntPack< WEIGHT_DEGREE >() , UIntPack< DataSig >() , tree , density , NULL , solution , isoValue , *mesh , SetVertex , NonLinearFit.set , !NonManifold.set , PolygonMesh.set , false );
+#endif // __GNUC__ || __GNUC__ < 4
+	messageWriter( "Vertices / Polygons: %d / %d\n" , mesh->outOfCorePointCount()+mesh->inCorePoints.size() , mesh->polygonCount() );
+	std::string isoStatsString = isoStats.toString() + std::string( "\n" );
+	messageWriter( isoStatsString.c_str() );
+	if( PolygonMesh.set ) profiler.dumpOutput2( comments , "#         Got polygons:" );
+	else                  profiler.dumpOutput2( comments , "#        Got triangles:" );
+
+	std::vector< std::string > noComments;
+	if( !PlyWritePolygons< Vertex , Real , Dim >( Out.value , mesh , ASCII.set ? PLY_ASCII : PLY_BINARY_NATIVE , NoComments.set ? noComments : comments , iXForm ) )
+		ERROR_OUT( "Could not write mesh to: %s" , Out.value );
+	delete mesh;
+}
+
+template< typename Real , unsigned int Dim >
+void WriteGrid( ConstPointer( Real ) values , int res , const char *fileName )
+{
+	int resolution = 1;
+	for( int d=0 ; d<Dim ; d++ ) resolution *= res;
+
+	char *ext = GetFileExtension( fileName );
+
+	if( Dim==2 && ImageWriter::ValidExtension( ext ) )
+	{
+		Real avg = 0;
+#pragma omp parallel for reduction( + : avg )
+		for( int i=0 ; i<resolution ; i++ ) avg += values[i];
+		avg /= (Real)resolution;
+
+		Real std = 0;
+#pragma omp parallel for reduction( + : std )
+		for( int i=0 ; i<resolution ; i++ ) std += ( values[i] - avg ) * ( values[i] - avg );
+		std = (Real)sqrt( std / resolution );
+
+		if( Verbose.set ) printf( "Grid to image: [%.2f,%.2f] -> [0,255]\n" , avg - 2*std , avg + 2*std );
+
+		unsigned char *pixels = new unsigned char[ resolution*3 ];
+#pragma omp parallel for
+		for( int i=0 ; i<resolution ; i++ )
+		{
+			Real v = (Real)std::min< Real >( (Real)1. , std::max< Real >( (Real)-1. , ( values[i] - avg ) / (2*std ) ) );
+			v = (Real)( ( v + 1. ) / 2. * 256. );
+			unsigned char color = (unsigned char )std::min< Real >( (Real)255. , std::max< Real >( (Real)0. , v ) );
+			for( int c=0 ; c<3 ; c++ ) pixels[i*3+c ] = color;
+		}
+		ImageWriter::Write( fileName , pixels , res , res , 3 );
+		delete[] pixels;
+	}
+	else
+	{
+
+		FILE *fp = fopen( fileName , "wb" );
+		if( !fp ) ERROR_OUT( "Failed to open grid file for writing: %s" , fileName );
+		else
+		{
+			fwrite( &res , sizeof(int) , 1 , fp );
+			if( typeid(Real)==typeid(float) ) fwrite( values , sizeof(float) , resolution , fp );
+			else
+			{
+				float *fValues = new float[resolution];
+				for( int i=0 ; i<resolution ; i++ ) fValues[i] = float( values[i] );
+				fwrite( fValues , sizeof(float) , resolution , fp );
+				delete[] fValues;
+			}
+			fclose( fp );
+		}
+	}
+	delete[] ext;
+}
+
+
+template< class Real , typename ... SampleData , unsigned int ... FEMSigs >
+void Execute( int argc , char* argv[] , UIntPack< FEMSigs ... > )
+{
+	static const int Dim = sizeof ... ( FEMSigs );
+	typedef UIntPack< FEMSigs ... > Sigs;
+	typedef UIntPack< FEMSignature< FEMSigs >::Degree ... > Degrees;
+	typedef UIntPack< FEMDegreeAndBType< NORMAL_DEGREE , DerivativeBoundary< FEMSignature< FEMSigs >::BType , 1 >::BType >::Signature ... > NormalSigs;
+	static const unsigned int DataSig = FEMDegreeAndBType< DATA_DEGREE , BOUNDARY_FREE >::Signature;
+	typedef typename FEMTree< Dim , Real >::template DensityEstimator< WEIGHT_DEGREE > DensityEstimator;
+	typedef typename FEMTree< Dim , Real >::template InterpolationInfo< Real , 1 > InterpolationInfo;
+	typedef PointStreamNormal< Real , Dim > NormalPointSampleData;
+	typedef MultiPointStreamData< Real , SampleData ... > AdditionalPointSampleData;
+	typedef MultiPointStreamData< Real , NormalPointSampleData , AdditionalPointSampleData > TotalPointSampleData;
+	typedef InputPointStreamWithData< Real , Dim , TotalPointSampleData > InputPointStream;
+	typedef TransformedInputPointStreamWithData< Real , Dim , TotalPointSampleData > XInputPointStream;
+	std::vector< std::string > comments;
+	messageWriter( comments , "************************************************\n" );
+	messageWriter( comments , "************************************************\n" );
+	messageWriter( comments , "** Running SSD Reconstruction (Version %s) **\n" , VERSION );
+	messageWriter( comments , "************************************************\n" );
+	messageWriter( comments , "************************************************\n" );
+
+	XForm< Real , Dim+1 > xForm , iXForm;
+	if( Transform.set )
+	{
+		FILE* fp = fopen( Transform.value , "r" );
 		if( !fp )
 		{
-			fprintf( stderr , "[WARNING] Could not read x-form from: %s\n" , XForm.value );
-			xForm = XForm4x4< Real >::Identity();
+			WARN( "Could not read x-form from: %s" , Transform.value );
+			xForm = XForm< Real , Dim+1 >::Identity();
 		}
 		else
 		{
-			for( int i=0 ; i<4 ; i++ ) for( int j=0 ; j<4 ; j++ )
+			for( int i=0 ; i<Dim+1 ; i++ ) for( int j=0 ; j<Dim+1 ; j++ )
 			{
 				float f;
-				if( fscanf( fp , " %f " , &f )!=1 ) fprintf( stderr , "[ERROR] Execute: Failed to read xform\n" ) , exit( 0 );
+				if( fscanf( fp , " %f " , &f )!=1 ) ERROR_OUT( "Failed to read xform" );
 				xForm(i,j) = (Real)f;
 			}
 			fclose( fp );
 		}
 	}
-	else xForm = XForm4x4< Real >::Identity();
-	
-	DumpOutput2( comments , "Running SSD Reconstruction (Version 9.011)\n" );
+	else xForm = XForm< Real , Dim+1 >::Identity();
+
 	char str[1024];
-	for( int i=0 ; i<paramNum ; i++ )
+	for( int i=0 ; params[i] ; i++ )
 		if( params[i]->set )
 		{
 			params[i]->writeValue( str );
-			if( strlen( str ) ) DumpOutput2( comments , "\t--%s %s\n" , params[i]->name , str );
-			else                DumpOutput2( comments , "\t--%s\n" , params[i]->name );
+			if( strlen( str ) ) messageWriter( comments , "\t--%s %s\n" , params[i]->name , str );
+			else                messageWriter( comments , "\t--%s\n" , params[i]->name );
 		}
 
 	double startTime = Time();
 	Real isoValue = 0;
 
-	Octree< Real > tree;
-	OctreeProfiler< Real > profiler( tree );
-	tree.threads = Threads.value;
-	if( !In.set )
-	{
-		ShowUsage( argv[0] );
-		return 0;
-	}
-	if( !MaxSolveDepth.set ) MaxSolveDepth.value = Depth.value;
-	
-	OctNode< TreeNodeData >::SetAllocator( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	FEMTree< Dim , Real > tree( MEMORY_ALLOCATOR_BLOCK_SIZE );
+	FEMTreeProfiler< Dim , Real > profiler( tree );
 
-	int kernelDepth = KernelDepth.set ? KernelDepth.value : Depth.value-2;
-	if( kernelDepth>Depth.value )
+	if( Depth.set && Width.value>0 )
 	{
-		fprintf( stderr,"[WARNING] %s can't be greater than %s: %d <= %d\n" , KernelDepth.name , Depth.name , KernelDepth.value , Depth.value );
-		kernelDepth = Depth.value;
+		WARN( "Both --%s and --%s set, ignoring --%s" , Depth.name , Width.name , Width.name );
+		Width.value = 0;
 	}
 
 	int pointCount;
 
 	Real pointWeightSum;
-	std::vector< typename Octree< Real >::PointSample >* samples = new std::vector< typename Octree< Real >::PointSample >();
-	std::vector< ProjectiveData< Point3D< Real > , Real > >* sampleData = NULL;
+	std::vector< typename FEMTree< Dim , Real >::PointSample >* samples = new std::vector< typename FEMTree< Dim , Real >::PointSample >();
+	std::vector< TotalPointSampleData >* sampleData = NULL;
 	DensityEstimator* density = NULL;
-	SparseNodeData< Point3D< Real > , NORMAL_DEGREE >* normalInfo = NULL;
+	SparseNodeData< Point< Real , Dim > , NormalSigs >* normalInfo = NULL;
 	Real targetValue = (Real)0.;
 
 	// Read in the samples (and color data)
 	{
 		profiler.start();
-		PointStream* pointStream;
+		InputPointStream* pointStream;
 		char* ext = GetFileExtension( In.value );
-		if( Color.set && Color.value>0 )
+		sampleData = new std::vector< TotalPointSampleData >();
+		std::vector< std::pair< Point< Real , Dim > , TotalPointSampleData > > inCorePoints;
+		if( InCore.set )
 		{
-			sampleData = new std::vector< ProjectiveData< Point3D< Real > , Real > >();
-			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryOrientedPointStreamWithData< Real , Point3D< Real > , float , Point3D< unsigned char > >( In.value );
-			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYOrientedPointStreamWithData< Real , Point3D< Real > >( In.value , ColorInfo< Real >::PlyProperties , 6 , ColorInfo< Real >::ValidPlyProperties );
-			else                                    pointStream = new  ASCIIOrientedPointStreamWithData< Real , Point3D< Real > >( In.value , ColorInfo< Real >::ReadASCII );
+			InputPointStream *_pointStream;
+			if     ( !strcasecmp( ext , "bnpts" ) ) _pointStream = new BinaryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadBinary );
+			else if( !strcasecmp( ext , "ply"   ) ) _pointStream = new    PLYInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::PlyReadProperties() , TotalPointSampleData::PlyReadNum , TotalPointSampleData::ValidPlyReadProperties );
+			else                                    _pointStream = new  ASCIIInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadASCII );
+			Point< Real , Dim > p;
+			TotalPointSampleData d;
+			while( _pointStream->nextPoint( p , d ) ) inCorePoints.push_back( std::pair< Point< Real , Dim > , TotalPointSampleData >( p , d ) );
+			delete _pointStream;
+
+			pointStream = new MemoryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( inCorePoints.size() , &inCorePoints[0] );
 		}
 		else
 		{
-			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryOrientedPointStream< Real , float >( In.value );
-			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYOrientedPointStream< Real >( In.value );
-			else                                    pointStream = new  ASCIIOrientedPointStream< Real >( In.value );
+			if     ( !strcasecmp( ext , "bnpts" ) ) pointStream = new BinaryInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadBinary );
+			else if( !strcasecmp( ext , "ply"   ) ) pointStream = new    PLYInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::PlyReadProperties() , TotalPointSampleData::PlyReadNum , TotalPointSampleData::ValidPlyReadProperties );
+			else                                    pointStream = new  ASCIIInputPointStreamWithData< Real , Dim , TotalPointSampleData >( In.value , TotalPointSampleData::ReadASCII );
 		}
 		delete[] ext;
-		XPointStream _pointStream( xForm , *pointStream );
-		xForm = GetPointXForm( _pointStream , (Real)Scale.value ) * xForm;
-		if( sampleData )
+		typename TotalPointSampleData::Transform _xForm( xForm );
+		XInputPointStream _pointStream( [&]( Point< Real , Dim >& p , TotalPointSampleData& d ){ p = xForm*p , d = _xForm(d); } , *pointStream );
+		if( Width.value>0 ) xForm = GetPointXForm< Real , Dim >( _pointStream , Width.value , (Real)( Scale.value>0 ? Scale.value : 1. ) , Depth.value ) * xForm;
+		else                xForm = Scale.value>0 ? GetPointXForm< Real , Dim >( _pointStream , (Real)Scale.value ) * xForm : xForm;
 		{
-			XPointStreamWithData _pointStream( xForm , ( PointStreamWithData& )*pointStream );
-			pointCount = tree.template init< Point3D< Real > >( _pointStream , Depth.value , Confidence.set , *samples , sampleData );
-		}
-		else
-		{
-			XPointStream _pointStream( xForm , *pointStream );
-			pointCount = tree.template init< Point3D< Real > >( _pointStream , Depth.value , Confidence.set , *samples , sampleData );
+			typename TotalPointSampleData::Transform _xForm( xForm );
+			XInputPointStream _pointStream( [&]( Point< Real , Dim >& p , TotalPointSampleData& d ){ p = xForm*p , d = _xForm(d); } , *pointStream );
+			auto ProcessDataWithConfidence = [&]( const Point< Real , Dim >& p , TotalPointSampleData& d )
+			{
+				Real l = (Real)Length( std::get< 0 >( d.data ).data );
+				if( !l || l!=l ) return (Real)-1.;
+				return (Real)pow( l , Confidence.value );
+			};
+			auto ProcessData = []( const Point< Real , Dim >& p , TotalPointSampleData& d )
+			{
+				Real l = (Real)Length( std::get< 0 >( d.data ).data );
+				if( !l || l!=l ) return (Real)-1.;
+				std::get< 0 >( d.data ).data /= l;
+				return (Real)1.;
+			};
+			if( Confidence.value>0 ) pointCount = FEMTreeInitializer< Dim , Real >::template Initialize< TotalPointSampleData >( tree.spaceRoot() , _pointStream , Depth.value , *samples , *sampleData , true , tree.nodeAllocator , tree.initializer() , ProcessDataWithConfidence );
+			else                     pointCount = FEMTreeInitializer< Dim , Real >::template Initialize< TotalPointSampleData >( tree.spaceRoot() , _pointStream , Depth.value , *samples , *sampleData , true , tree.nodeAllocator , tree.initializer() , ProcessData );
 		}
 		iXForm = xForm.inverse();
 		delete pointStream;
-#pragma omp parallel for num_threads( Threads.value )
-		for( int i=0 ; i<(int)samples->size() ; i++ ) (*samples)[i].sample.data.n *= (Real)-1;
 
-		DumpOutput( "Input Points / Samples: %d / %d\n" , pointCount , samples->size() );
+		messageWriter( "Input Points / Samples: %d / %d\n" , pointCount , samples->size() );
 		profiler.dumpOutput2( comments , "# Read input into tree:" );
 	}
+	int kernelDepth = KernelDepth.set ? KernelDepth.value : Depth.value-2;
+	if( kernelDepth>Depth.value )
+	{
+		WARN( "%s can't be greater than %s: %d <= %d" , KernelDepth.name , Depth.name , KernelDepth.value , Depth.value );
+		kernelDepth = Depth.value;
+	}
 
-	DenseNodeData< Real , Degree > solution;
-	// Solve
+	DenseNodeData< Real , Sigs > solution;
 	{
-		DenseNodeData< Real , Degree > constraints;
+		DenseNodeData< Real , Sigs > constraints;
 		InterpolationInfo* iInfo = NULL;
-		int solveDepth = MaxSolveDepth.value;
+		int solveDepth = Depth.value;
 
 		tree.resetNodeIndices();
 
 		// Get the kernel density estimator
 		{
 			profiler.start();
-			density = tree.template setDensityEstimator< WEIGHT_DEGREE >( *samples , kernelDepth , SamplesPerNode.value );
+			density = tree.template setDensityEstimator< WEIGHT_DEGREE >( *samples , kernelDepth , SamplesPerNode.value , 1 );
 			profiler.dumpOutput2( comments , "#   Got kernel density:" );
 		}
 
 		// Transform the Hermite samples into a vector field
 		{
 			profiler.start();
-			normalInfo = new SparseNodeData< Point3D< Real > , NORMAL_DEGREE >();
-			*normalInfo = tree.template setNormalField< NORMAL_DEGREE >( *samples , *density , pointWeightSum , BType==BOUNDARY_NEUMANN );
+			normalInfo = new SparseNodeData< Point< Real , Dim > , NormalSigs >();
+			if( ConfidenceBias.value>0 ) *normalInfo = tree.setNormalField( NormalSigs() , *samples , *sampleData , density , pointWeightSum , [&]( Real conf ){ return (Real)( log( conf ) * ConfidenceBias.value / log( 1<<(Dim-1) ) ); } );
+			else                         *normalInfo = tree.setNormalField( NormalSigs() , *samples , *sampleData , density , pointWeightSum );
 			profiler.dumpOutput2( comments , "#     Got normal field:" );
+			messageWriter( "Point weight / Estimated Area: %g / %g\n" , pointWeightSum , pointCount*pointWeightSum );
 		}
 
 		if( !Density.set ) delete density , density = NULL;
@@ -546,241 +600,222 @@ int _Execute( int argc , char* argv[] )
 		// Trim the tree and prepare for multigrid
 		{
 			profiler.start();
-			std::vector< int > indexMap;
-
-			constexpr int MAX_DEGREE = NORMAL_DEGREE > Degree ? NORMAL_DEGREE : Degree;
-			tree.template inalizeForBroodedMultigrid< MAX_DEGREE , Degree , BType >( FullDepth.value , typename Octree< Real >::template HasNormalDataFunctor< NORMAL_DEGREE >( *normalInfo ) , &indexMap );
-
-			if( normalInfo ) normalInfo->remapIndices( indexMap );
-			if( density ) density->remapIndices( indexMap );
+			constexpr int MAX_DEGREE = NORMAL_DEGREE > Degrees::Max() ? NORMAL_DEGREE : Degrees::Max();
+			tree.template finalizeForMultigrid< MAX_DEGREE >( FullDepth.value , typename FEMTree< Dim , Real >::template HasNormalDataFunctor< NormalSigs >( *normalInfo ) , normalInfo , density );
 			profiler.dumpOutput2( comments , "#       Finalized tree:" );
 		}
 
-		// Free up the normal info
+		// Free up the normal info [If we don't need it for subsequent iterations.]
 		if( normalInfo ) delete normalInfo , normalInfo = NULL;
 
 		// Add the interpolation constraints
 		if( ValueWeight.value>0 || GradientWeight.value>0 )
 		{
 			profiler.start();
-			iInfo = new InterpolationInfo( tree , *samples , targetValue , AdaptiveExponent.value , (Real)ValueWeight.value * pointWeightSum , (Real)GradientWeight.value * pointWeightSum );
-			constraints = tree.template initDenseNodeData< Degree >( );
-			tree.template addInterpolationConstraints< Degree , BType >( *iInfo , constraints , solveDepth );
+			if( ExactInterpolation.set ) iInfo = FEMTree< Dim , Real >::template       InitializeExactPointAndDataInterpolationInfo< Real , TotalPointSampleData , 1 >( tree , *samples , GetPointer( *sampleData ) , ConstraintDual< Dim , Real , TotalPointSampleData >( targetValue , (Real)ValueWeight.value * pointWeightSum , (Real)GradientWeight.value * pointWeightSum  ) , SystemDual< Dim , Real , TotalPointSampleData >( (Real)ValueWeight.value * pointWeightSum , (Real)GradientWeight.value * pointWeightSum ) , true , false );
+			else                         iInfo = FEMTree< Dim , Real >::template InitializeApproximatePointAndDataInterpolationInfo< Real , TotalPointSampleData , 1 >( tree , *samples , GetPointer( *sampleData ) , ConstraintDual< Dim , Real , TotalPointSampleData >( targetValue , (Real)ValueWeight.value * pointWeightSum , (Real)GradientWeight.value * pointWeightSum  ) , SystemDual< Dim , Real , TotalPointSampleData >( (Real)ValueWeight.value * pointWeightSum , (Real)GradientWeight.value * pointWeightSum ) , true , 1 );
+			constraints = tree.initDenseNodeData( Sigs() );
+			tree.addInterpolationConstraints( constraints , solveDepth , *iInfo );
 			profiler.dumpOutput2( comments , "#Set point constraints:" );
+			if( DataX.value<=0 || ( !Colors.set && !Normals.set ) ) delete sampleData , sampleData = NULL;
 		}
 
-		DumpOutput( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
-		DumpOutput( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage())/(1<<20) );
+		messageWriter( "Leaf Nodes / Active Nodes / Ghost Nodes: %d / %d / %d\n" , (int)tree.leaves() , (int)tree.nodes() , (int)tree.ghostNodes() );
+		messageWriter( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage())/(1<<20) );
 
 		// Solve the linear system
 		{
 			profiler.start();
-			typename Octree< Real >::SolverInfo solverInfo;
-			solverInfo.cgDepth = CGDepth.value , solverInfo.iters = Iters.value , solverInfo.cgAccuracy = CGSolverAccuracy.value , solverInfo.verbose = Verbose.set , solverInfo.showResidual = ShowResidual.set , solverInfo.lowResIterMultiplier = std::max< double >( 1. , LowResIterMultiplier.value );
-			solution = tree.template solveSystem< Degree , BType >( FEMSystemFunctor< Degree , BType >( 0 , 0 , BiLapWeight.value ) , iInfo , constraints , solveDepth , solverInfo );
+			typename FEMTree< Dim , Real >::SolverInfo sInfo;
+			sInfo.cgDepth = 0 , sInfo.cascadic = true , sInfo.vCycles = 1 , sInfo.iters = Iters.value , sInfo.cgAccuracy = CGSolverAccuracy.value , sInfo.verbose = Verbose.set , sInfo.showResidual = ShowResidual.set , sInfo.showGlobalResidual = SHOW_GLOBAL_RESIDUAL_NONE , sInfo.sliceBlockSize = 1;
+			sInfo.baseDepth = BaseDepth.value , sInfo.baseVCycles = BaseVCycles.value;
+			typename FEMIntegrator::template System< Sigs , IsotropicUIntPack< Dim , 2 > > F( { 0. , 0. , (double)BiLapWeight.value } );
+			solution = tree.solveSystem( Sigs() , F , constraints , solveDepth , sInfo , iInfo );
 			profiler.dumpOutput2( comments , "# Linear system solved:" );
-			DumpOutput( "Memory Usage: %.3f MB\n" , float( MemoryInfo::Usage() )/(1<<20) );
 			if( iInfo ) delete iInfo , iInfo = NULL;
 		}
 	}
 
-	char tempHeader[1024];
-	{
-#if defined( _WIN32 ) || defined( _WIN64 )
-		const char FileSeparator = '\\';
-#else // !_WIN
-		const char FileSeparator = '/';
-#endif // _WIN
-		char tempPath[1024];
-		tempPath[0] = 0;
-		if( TempDir.set ) strcpy( tempPath , TempDir.value );
-		else
-		{
-#if defined( _WIN32 ) || defined( _WIN64 )
-			GetTempPath( sizeof(tempPath) , tempPath );
-#else // !_WIN
-			if( std::getenv( "TMPDIR" ) ) strcpy( tempPath , std::getenv( "TMPDIR" ) );
-#endif // _WIN
-		}
-		if( strlen(tempPath)==0 ) sprintf( tempPath , ".%c" , FileSeparator );
-		if( tempPath[ strlen( tempPath )-1 ]==FileSeparator ) sprintf( tempHeader , "%sPR_" , tempPath );
-		else                                                  sprintf( tempHeader , "%s%cPR_" , tempPath , FileSeparator );
-	}
-	CoredFileMeshData< Vertex > mesh( tempHeader );
-
 	{
 		profiler.start();
 		double valueSum = 0 , weightSum = 0;
-		typename Octree< Real >::template MultiThreadedEvaluator< Degree , BType > evaluator( &tree , solution , Threads.value );
-#pragma omp parallel for num_threads( Threads.value ) reduction( + : valueSum , weightSum )
+		typename FEMTree< Dim , Real >::template MultiThreadedEvaluator< Sigs , 0 > evaluator( &tree , solution );
+#pragma omp parallel for reduction( + : valueSum , weightSum )
 		for( int j=0 ; j<samples->size() ; j++ )
 		{
-			ProjectiveData< OrientedPoint3D< Real > , Real >& sample = (*samples)[j].sample;
+			ProjectiveData< Point< Real , Dim > , Real >& sample = (*samples)[j].sample;
 			Real w = sample.weight;
-			if( w>0 ) weightSum += w , valueSum += evaluator.value( sample.data.p / sample.weight , omp_get_thread_num() , (*samples)[j].node ) * w;
+			if( w>0 ) weightSum += w , valueSum += evaluator.values( sample.data / sample.weight , omp_get_thread_num() , (*samples)[j].node )[0] * w;
 		}
 		isoValue = (Real)( valueSum / weightSum );
-		if( !( Color.set && Color.value>0 ) && samples ) delete samples , samples = NULL;
+		if( DataX.value<=0 || ( !Colors.set && !Normals.set ) ) delete samples , samples = NULL;
 		profiler.dumpOutput( "Got average:" );
-		DumpOutput( "Iso-Value: %e\n" , isoValue );
+		messageWriter( "Iso-Value: %e = %g / %g\n" , isoValue , valueSum , weightSum );
+	}
+	if( Tree.set )
+	{
+		FILE* fp = fopen( Tree.value , "wb" );
+		if( !fp ) ERROR_OUT( "Failed to open file for writing: %s" , Tree.value );
+		FEMTree< Dim , Real >::WriteParameter( fp );
+		DenseNodeData< Real , Sigs >::WriteSignatures( fp );
+		tree.write( fp );
+		solution.write( fp );
+		fclose( fp );
 	}
 
-	if( VoxelGrid.set )
+	if( Grid.set )
 	{
+		int res = 0;
 		profiler.start();
-		FILE* fp = fopen( VoxelGrid.value , "wb" );
-		if( !fp ) fprintf( stderr , "Failed to open voxel file for writing: %s\n" , VoxelGrid.value );
-		else
+		Pointer( Real ) values = tree.template regularGridEvaluate< true >( solution , res , -1 , PrimalGrid.set );
+		int resolution = 1;
+		for( int d=0 ; d<Dim ; d++ ) resolution *= res;
+#pragma omp parallel for
+		for( int i=0 ; i<resolution ; i++ ) values[i] -= isoValue;
+		profiler.dumpOutput( "Got grid:" );
+		WriteGrid< Real , DIMENSION >( values , res , Grid.value );
+		DeletePointer( values );
+		if( Verbose.set )
 		{
-			int res = 0;
-			Pointer( Real ) values = tree.template voxelEvaluate< Real , Degree , BType >( solution , res , isoValue , VoxelDepth.value , PrimalVoxel.set );
-			fwrite( &res , sizeof(int) , 1 , fp );
-			if( sizeof(Real)==sizeof(float) ) fwrite( values , sizeof(float) , res*res*res , fp );
-			else
+			printf( "Transform:\n" );
+			for( int i=0 ; i<Dim+1 ; i++ )
 			{
-				float *fValues = new float[res*res*res];
-				for( int i=0 ; i<res*res*res ; i++ ) fValues[i] = float( values[i] );
-				fwrite( fValues , sizeof(float) , res*res*res , fp );
-				delete[] fValues;
+				printf( "\t" );
+				for( int j=0 ; j<Dim+1 ; j++ ) printf( " %f" , iXForm(j,i) );
+				printf( "\n" );
 			}
-			fclose( fp );
-			DeletePointer( values );
 		}
-		profiler.dumpOutput( "Got voxel grid:" );
 	}
 
 	if( Out.set )
 	{
-		profiler.start();
-		SparseNodeData< ProjectiveData< Point3D< Real > , Real > , DATA_DEGREE >* colorData = NULL;
-		if( sampleData )
+		if( Normals.set )
 		{
-			colorData = new SparseNodeData< ProjectiveData< Point3D< Real > , Real > , DATA_DEGREE >();
-			*colorData = tree.template setDataField< DATA_DEGREE , false >( *samples , *sampleData , (DensityEstimator*)NULL );
-			delete sampleData , sampleData = NULL;
-			for( const OctNode< TreeNodeData >* n = tree.tree().nextNode() ; n ; n=tree.tree().nextNode( n ) )
+			if( Density.set )
 			{
-				ProjectiveData< Point3D< Real > , Real >* clr = (*colorData)( n );
-				if( clr ) (*clr) *= (Real)pow( Color.value , tree.depth( n ) );
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamNormal< Real , Dim > , PointStreamValue< Real > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 0 >( d.data ) , std::get< 1 >( v.data.data ).data = w , std::get< 2 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
+			else
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamNormal< Real , Dim > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 0 >( d.data ) , std::get< 1 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
 			}
-		}
-		tree.template getMCIsoSurface< Degree , BType , WEIGHT_DEGREE , DATA_DEGREE >( density , colorData , solution , isoValue , mesh , NonLinearFit.set , !NonManifold.set , PolygonMesh.set );
-		DumpOutput( "Vertices / Polygons: %d / %d\n" , mesh.outOfCorePointCount()+mesh.inCorePoints.size() , mesh.polygonCount() );
-		if( PolygonMesh.set ) profiler.dumpOutput2( comments , "#         Got polygons:" );
-		else                  profiler.dumpOutput2( comments , "#        Got triangles:" );
-
-		if( colorData ) delete colorData , colorData = NULL;
-
-		if( NoComments.set )
-		{
-			if( ASCII.set ) PlyWritePolygons( Out.value , &mesh , PLY_ASCII         , NULL , 0 , iXForm );
-			else            PlyWritePolygons( Out.value , &mesh , PLY_BINARY_NATIVE , NULL , 0 , iXForm );
 		}
 		else
 		{
-			if( ASCII.set ) PlyWritePolygons( Out.value , &mesh , PLY_ASCII         , &comments[0] , (int)comments.size() , iXForm );
-			else            PlyWritePolygons( Out.value , &mesh , PLY_BINARY_NATIVE , &comments[0] , (int)comments.size() , iXForm );
+			if( Density.set )
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , PointStreamValue< Real > , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ).data = w , std::get< 1 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
+			else
+			{
+				typedef PlyVertexWithData< Real , Dim , MultiPointStreamData< Real , AdditionalPointSampleData > > Vertex;
+				std::function< void ( Vertex& , Point< Real , Dim > , Real , TotalPointSampleData ) > SetVertex = []( Vertex& v , Point< Real , Dim > p , Real w , TotalPointSampleData d ){ v.point = p , std::get< 0 >( v.data.data ) = std::get< 1 >( d.data ); };
+				ExtractMesh< Vertex >( UIntPack< FEMSigs ... >() , std::tuple< SampleData ... >() , tree , solution , isoValue , samples , sampleData , density , SetVertex , comments , iXForm );
+			}
 		}
+		if( sampleData ){ delete sampleData ; sampleData = NULL; }
 	}
 	if( density ) delete density , density = NULL;
-	DumpOutput2( comments , "#          Total Solve: %9.1f (s), %9.1f (MB)\n" , Time()-startTime , tree.maxMemoryUsage() );
-
-	return 1;
-}
-
-#if defined( _WIN32 ) || defined( _WIN64 )
-inline double to_seconds( const FILETIME& ft )
-{
-	const double low_to_sec=100e-9; // 100 nanoseconds
-	const double high_to_sec=low_to_sec*4294967296.0;
-	return ft.dwLowDateTime*low_to_sec+ft.dwHighDateTime*high_to_sec;
+	messageWriter( comments , "#          Total Solve: %9.1f (s), %9.1f (MB)\n" , Time()-startTime , FEMTree< Dim , Real >::MaxMemoryUsage() );
 }
-#endif // _WIN32 || _WIN64
 
 #ifndef FAST_COMPILE
-template< class Real , class Vertex >
-int Execute( int argc , char* argv[] )
+template< unsigned int Dim , class Real , typename ... SampleData >
+void Execute( int argc , char* argv[] )
 {
-	if( FreeBoundary.set )
+	switch( BType.value )
+	{
+	case BOUNDARY_FREE+1:
+	{
 		switch( Degree.value )
 		{
-		case 2: return _Execute< Real , 2 , BOUNDARY_FREE , Vertex >( argc , argv );
-		case 3: return _Execute< Real , 3 , BOUNDARY_FREE , Vertex >( argc , argv );
-		case 4: return _Execute< Real , 4 , BOUNDARY_FREE , Vertex >( argc , argv );
-		default: fprintf( stderr , "[ERROR] Only B-Splines of degree 2 - 4 are supported" ) ; return EXIT_FAILURE;
+			case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_FREE >::Signature >() );
+			case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_FREE >::Signature >() );
+//			case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_FREE >::Signature >() );
+			default: ERROR_OUT( "Only B-Splines of degree 2 - 3 are supported" );
 		}
-	else
+	}
+	case BOUNDARY_NEUMANN+1:
+	{
 		switch( Degree.value )
 		{
-		case 2: return _Execute< Real , 2 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-		case 3: return _Execute< Real , 3 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-		case 4: return _Execute< Real , 4 , BOUNDARY_NEUMANN , Vertex >( argc , argv );
-		default: fprintf( stderr , "[ERROR] Only B-Splines of degree 2 - 4 are supported" ) ; return EXIT_FAILURE;
+			case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_NEUMANN >::Signature >() );
+			case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_NEUMANN >::Signature >() );
+//			case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_NEUMANN >::Signature >() );
+			default: ERROR_OUT( "Only B-Splines of degree 2 - 3 are supported" );
 		}
+	}
+	case BOUNDARY_DIRICHLET+1:
+	{
+		switch( Degree.value )
+		{
+			case 2: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 2 , BOUNDARY_DIRICHLET >::Signature >() );
+			case 3: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 3 , BOUNDARY_DIRICHLET >::Signature >() );
+//			case 4: return Execute< Real , SampleData ... >( argc , argv , IsotropicUIntPack< Dim , FEMDegreeAndBType< 4 , BOUNDARY_DIRICHLET >::Signature >() );
+			default: ERROR_OUT( "Only B-Splines of degree 2 - 3 are supported" );
+		}
+	}
+	default: ERROR_OUT( "Not a valid boundary type: %d" , BType.value );
+	}
 }
 #endif // !FAST_COMPILE
+
 int main( int argc , char* argv[] )
 {
-#if defined(WIN32) && defined(MAX_MEMORY_GB)
-	if( MAX_MEMORY_GB>0 )
+	Timer timer;
+#ifdef ARRAY_DEBUG
+	WARN( "Array debugging enabled" );
+#endif // ARRAY_DEBUG
+
+	cmdLineParse( argc-1 , &argv[1] , params );
+	if( MaxMemoryGB.value>0 ) SetPeakMemoryMB( MaxMemoryGB.value<<10 );
+	omp_set_num_threads( Threads.value > 1 ? Threads.value : 1 );
+	messageWriter.echoSTDOUT = Verbose.set;
+
+	if( !In.set )
+	{
+		ShowUsage( argv[0] );
+		return 0;
+	}
+	if( GradientWeight.value<=0 ) ERROR_OUT( "Gradient weight must be positive: %g>0" , GradientWeight.value );
+	if( BiLapWeight.value<=0 ) ERROR_OUT( "Bi-Laplacian weight must be positive: %g>0" , BiLapWeight.value );
+	if( DataX.value<=0 ) Normals.set = Colors.set = false;
+	if( BaseDepth.value>FullDepth.value )
 	{
-		SIZE_T peakMemory = 1;
-		peakMemory <<= 30;
-		peakMemory *= MAX_MEMORY_GB;
-		printf( "Limiting memory usage to %.2f GB\n" , float( peakMemory>>30 ) );
-		HANDLE h = CreateJobObject( NULL , NULL );
-		AssignProcessToJobObject( h , GetCurrentProcess() );
-
-		JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
-		jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_JOB_MEMORY;
-		jeli.JobMemoryLimit = peakMemory;
-		if( !SetInformationJobObject( h , JobObjectExtendedLimitInformation , &jeli , sizeof( jeli ) ) )
-			fprintf( stderr , "Failed to set memory limit\n" );
+		if( BaseDepth.set ) WARN( "Base depth must be smaller than full depth: %d <= %d" , BaseDepth.value , FullDepth.value );
+		BaseDepth.value = FullDepth.value;
 	}
-#endif // defined(WIN32) && defined(MAX_MEMORY_GB)
-	double t = Time();
+	ValueWeight.value    *= (float)BaseSSDWeights[0];
+	GradientWeight.value *= (float)BaseSSDWeights[1];
+	BiLapWeight.value    *= (float)BaseSSDWeights[2];
+
+#ifdef USE_DOUBLE
+	typedef double Real;
+#else // !USE_DOUBLE
+	typedef float  Real;
+#endif // USE_DOUBLE
 
-	cmdLineParse( argc-1 , &argv[1] , sizeof(params)/sizeof(cmdLineReadable*) , params , 1 );
-	if( GradientWeight.value<=0 ) fprintf( stderr , "[ERROR] Gradient weight must be positive: %g>=0\n" , GradientWeight.value ) , exit( 0 );
-	if( BiLapWeight.value<=0 ) fprintf( stderr , "[ERROR] Bi-Laplacian weight must be positive: %g>=0\n" , BiLapWeight.value ) , exit( 0 );
 #ifdef FAST_COMPILE
-	static const int Degree = 2;
-	static const BoundaryType BType = BOUNDARY_NEUMANN;
-	fprintf( stderr , "[WARNING] Compiling for degree-%d, boundary-%s, single-precision _only_\n" , Degree , BoundaryNames[ BType ] );
-	if( Density.set )
-		if( Color.set && Color.value>0 ) _Execute< float , Degree , BType , PlyColorAndValueVertex< float > >( argc , argv );
-		else                             _Execute< float , Degree , BType , PlyValueVertex< float > >( argc , argv );
-	else
-		if( Color.set && Color.value>0 ) _Execute< float , Degree , BType , PlyColorVertex< float > >( argc , argv );
-		else                             _Execute< float , Degree , BType , PlyVertex< float > >( argc , argv );
+	static const int Degree = DEFAULT_FEM_DEGREE;
+	static const BoundaryType BType = DEFAULT_FEM_BOUNDARY;
+	typedef IsotropicUIntPack< DIMENSION , FEMDegreeAndBType< Degree , BType >::Signature > FEMSigs;
+	WARN( "Compiled for degree-%d, boundary-%s, %s-precision _only_" , Degree , BoundaryNames[ BType ] , sizeof(DefaultFloatType)==4 ? "single" : "double" );
+	if( Colors.set ) Execute< Real , PointStreamColor< DefaultFloatType > >( argc , argv , FEMSigs() );
+	else             Execute< Real >( argc , argv , FEMSigs() );
 #else // !FAST_COMPILE
-	if( Density.set )
-		if( Color.set && Color.value>0 )
-			if( Double.set ) Execute< double , PlyColorAndValueVertex< float > >( argc , argv );
-			else             Execute< float  , PlyColorAndValueVertex< float > >( argc , argv );
-		else
-			if( Double.set ) Execute< double , PlyValueVertex< float > >( argc , argv );
-			else             Execute< float  , PlyValueVertex< float > >( argc , argv );
-	else
-		if( Color.set && Color.value>0 )
-			if( Double.set ) Execute< double , PlyColorVertex< float > >( argc , argv );
-			else             Execute< float  , PlyColorVertex< float > >( argc , argv );
-		else
-			if( Double.set ) Execute< double , PlyVertex< float > >( argc , argv );
-			else             Execute< float  , PlyVertex< float > >( argc , argv );
+	if( Colors.set ) Execute< DIMENSION , Real , PointStreamColor< float > >( argc , argv );
+	else             Execute< DIMENSION , Real >( argc , argv );
 #endif // FAST_COMPILE
-#if defined( _WIN32 ) || defined( _WIN64 )
 	if( Performance.set )
 	{
-		HANDLE cur_thread=GetCurrentThread();
-		FILETIME tcreat, texit, tkernel, tuser;
-		if( GetThreadTimes( cur_thread , &tcreat , &texit , &tkernel , &tuser ) )
-			printf( "Time (Wall/User/Kernel): %.2f / %.2f / %.2f\n" , Time()-t , to_seconds( tuser ) , to_seconds( tkernel ) );
-		else printf( "Time: %.2f\n" , Time()-t );
-		HANDLE h = GetCurrentProcess();
-		PROCESS_MEMORY_COUNTERS pmc;
-		if( GetProcessMemoryInfo( h , &pmc , sizeof(pmc) ) ) printf( "Peak Memory (MB): %d\n" , (int)(pmc.PeakWorkingSetSize>>20) );
+		printf( "Time (Wall/CPU): %.2f / %.2f\n" , timer.wallTime() , timer.cpuTime() );
+		printf( "Peak Memory (MB): %d\n" , MemoryInfo::PeakMemoryUsageMB() );
 	}
-#endif // _WIN32 || _WIN64
 	return EXIT_SUCCESS;
 }
diff --git a/Src/SparseMatrix.h b/Src/SparseMatrix.h
index f133157..8c6bbf8 100644
--- a/Src/SparseMatrix.h
+++ b/Src/SparseMatrix.h
@@ -25,170 +25,134 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 DAMAGE.
 */
-
 #ifndef __SPARSEMATRIX_HPP
 #define __SPARSEMATRIX_HPP
 
-#define NEW_SPARSE_MATRIX 1
-#define ZERO_TESTING_JACOBI 1
-
-
+#include "SparseMatrixInterface.h"
 #include "Array.h"
 
-template <class T>
-struct MatrixEntry
-{
-	MatrixEntry( void )		    { N =-1; Value = 0; }
-	MatrixEntry( int i )	    { N = i; Value = 0; }
-	MatrixEntry( int i , T v )	{ N = i; Value = v; }
-	int N;
-	T Value;
-};
+template< class T , class IndexType , size_t MaxRowSize=0 > class SparseMatrix;
 
-template<class T> class SparseMatrix
+template< class T , class IndexType > class SparseMatrix< T , IndexType , 0 > : public SparseMatrixInterface< T , ConstPointer( MatrixEntry< T , IndexType > ) >
 {
-private:
-	bool _contiguous;
-	int _maxEntriesPerRow;
-	void _init( void );
+	template< class T2 , class IndexType2 , size_t MaxRowSize2 > friend class SparseMatrix;
+	Pointer( Pointer( MatrixEntry< T , IndexType > ) ) _entries;
 public:
-	int rows;
-	Pointer( int ) rowSizes;
-	Pointer( Pointer( MatrixEntry< T > ) ) m_ppElements;
-	Pointer( MatrixEntry< T > ) operator[] ( int idx ) { return m_ppElements[idx]; }
-	ConstPointer( MatrixEntry< T > ) operator[] ( int idx ) const { return m_ppElements[idx]; }
+	static void Swap( SparseMatrix& M1 , SparseMatrix& M2 )
+	{
+		std::swap( M1.rowNum , M2.rowNum );
+		std::swap( M1.rowSizes , M2.rowSizes );
+		std::swap( M1._entries , M2._entries );
+	}
+	typedef SparseMatrixInterface< T , ConstPointer( MatrixEntry< T , IndexType > ) > Interface;
+	typedef ConstPointer( MatrixEntry< T , IndexType > ) RowIterator;
 
-	SparseMatrix( void );
-	SparseMatrix( int rows );
-	SparseMatrix( int rows , int maxEntriesPerRow );
-	void Resize( int rows );
-	void Resize( int rows , int maxEntriesPerRow );
-	void SetRowSize( int row , int count );
-	int Entries( void ) const;
+	size_t rowNum;
+	Pointer( size_t ) rowSizes;
 
+	SparseMatrix( void );
 	SparseMatrix( const SparseMatrix& M );
+	SparseMatrix( SparseMatrix&& M );
+	template< class T2 , class IndexType2 >
+	SparseMatrix( const SparseMatrix< T2 , IndexType2 , 0 >& M );
 	~SparseMatrix();
-
-	void SetZero();
-
-	SparseMatrix<T>& operator = (const SparseMatrix<T>& M);
-
-	SparseMatrix<T> operator * (const T& V) const;
-	SparseMatrix<T>& operator *= (const T& V);
-
-	template< class T2 > void Multiply( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads=1 ) const;
-	template< class T2 > void MultiplyAndAddAverage( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads=1 ) const;
-
-	bool write( FILE* fp ) const;
-	bool write( const char* fileName ) const;
-	bool read( FILE* fp );
-	bool read( const char* fileName );
-
-	template< class T2 > void getDiagonal( Pointer( T2 ) diagonal , int threads=1 ) const;
-	template< class T2 > static int SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads=1 );
-	template< class T2 > static int SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads=1 );
-	template< class T2 > static int SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward );
-	template< class T2 > static int SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward );
-	template< class T2 > static int SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads=1 );
-	template< class T2 > static int SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads=1 );
-	template< class T2 > static int SolveCG( const SparseMatrix<T>& M , ConstPointer( T2 ) b , int iters , Pointer( T2 ) x , T2 eps=1e-8 , int reset=1 , bool addDCTerm=false , bool solveNormal=false , int threads=1 );
+	SparseMatrix& operator = ( SparseMatrix&& M );
+	SparseMatrix< T , IndexType >& operator = ( const SparseMatrix< T , IndexType >& M );
+	template< class T2 , class IndexType2 >
+	SparseMatrix< T , IndexType , 0 >& operator = ( const SparseMatrix< T2 , IndexType2 , 0 >& M );
+
+	template< class T2 > void operator()( const T2* in , T2* out ) const;
+
+	template< class T2 , class IndexType2 >
+	SparseMatrix< T , IndexType , 0 >& copy( const SparseMatrix< T2 , IndexType2 , 0 >& M );
+
+	inline ConstPointer( MatrixEntry< T , IndexType > ) begin( size_t row ) const { return _entries[row]; }
+	inline ConstPointer( MatrixEntry< T , IndexType > ) end  ( size_t row ) const { return _entries[row] + (unsigned long long)rowSizes[row]; }
+	inline size_t rows                              ( void )       const { return rowNum; }
+	inline size_t rowSize                           ( size_t idx ) const { return rowSizes[idx]; }
+
+	SparseMatrix( size_t rowNum );
+	void resize	( size_t rowNum );
+	void setRowSize( size_t row , size_t count );
+	void resetRowSize( size_t row , size_t count );
+	inline      Pointer( MatrixEntry< T , IndexType > ) operator[] ( size_t idx )       { return _entries[idx]; }
+	inline ConstPointer( MatrixEntry< T , IndexType > ) operator[] ( size_t idx ) const { return _entries[idx]; }
+	
+	// With copy move, these should be well-behaved from a memory perspective
+	static SparseMatrix Identity( size_t dim );
+	SparseMatrix transpose(                  T (*TransposeFunction)( const T& )=NULL ) const;
+	SparseMatrix transpose( size_t outRows , T (*TransposeFunction)( const T& )=NULL ) const;
+	SparseMatrix  operator *  ( T s ) const;
+	SparseMatrix  operator /  ( T s ) const;
+	SparseMatrix  operator *  ( const SparseMatrix& M ) const;
+	SparseMatrix  operator +  ( const SparseMatrix& M ) const;
+	SparseMatrix  operator -  ( const SparseMatrix& M ) const;
+	SparseMatrix& operator *= ( T s );
+	SparseMatrix& operator /= ( T s );
+	SparseMatrix& operator *= ( const SparseMatrix& M );
+	SparseMatrix& operator += ( const SparseMatrix& M );
+	SparseMatrix& operator -= ( const SparseMatrix& M );
+
+	Pointer( T ) operator * ( const Pointer( T ) in ) const;
+
+	template< class A_const_iterator , class B_const_iterator >
+	static SparseMatrix Multiply( const SparseMatrixInterface< T , A_const_iterator >& A , const SparseMatrixInterface< T , B_const_iterator >& B );
+	template< class const_iterator >
+	static SparseMatrix Transpose( const SparseMatrixInterface< T , const_iterator >& At , T (*TransposeFunction)( const T& )=NULL );
+	template< class const_iterator >
+	static SparseMatrix Transpose( const SparseMatrixInterface< T , const_iterator >& At , size_t outRows , T (*TransposeFunction)( const T& )=NULL );
 };
 
-
-#if !NEW_SPARSE_MATRIX
-template< class T2 >
-struct MapReduceVector
+template< class T , class IndexType , size_t MaxRowSize > class SparseMatrix : public SparseMatrixInterface< T , ConstPointer( MatrixEntry< T , IndexType > ) >
 {
-private:
-	int _dim;
+	template< class T2 , class IndexType2 > friend class _SparseMatrix;
+	Pointer( MatrixEntry< T , IndexType > ) _entries;
+	size_t _rowNum;
+	Pointer( size_t ) _rowSizes;
+	size_t _maxRows;
 public:
-	std::vector< T2* > out;
-	MapReduceVector( void ) { _dim = 0; }
-	~MapReduceVector( void )
-	{
-		if( _dim ) for( int t=0 ; t<int(out.size()) ; t++ ) delete[] out[t];
-		out.resize( 0 );
-	}
-	T2* operator[]( int t ) { return out[t]; }
-	const T2* operator[]( int t ) const { return out[t]; }
-	int threads( void ) const { return int( out.size() ); }
-	void resize( int threads , int dim )
+	static void Swap( SparseMatrix& M1 , SparseMatrix& M2 )
 	{
-		if( threads!=out.size() || _dim<dim )
-		{
-			for( int t=0 ; t<int(out.size()) ; t++ ) delete[] out[t];
-			out.resize( threads );
-			for( int t=0 ; t<int(out.size()) ; t++ ) out[t] = new T2[dim];
-			_dim = dim;
-		}
+		std::swap( M1._rowNum , M2._rowNum );
+		std::swap( M1._rowSizes , M2._rowSizes );
+		std::swap( M1._entries , M2._entries );
 	}
+	typedef SparseMatrixInterface< T , ConstPointer( MatrixEntry< T , IndexType > ) > Interface;
+	typedef ConstPointer( MatrixEntry< T , IndexType > ) RowIterator;
 
+	SparseMatrix( void );
+	SparseMatrix( const SparseMatrix& M );
+	SparseMatrix( SparseMatrix&& M );
+	template< class T2 , class IndexType2 >
+	SparseMatrix( const SparseMatrix< T2 , IndexType2 , MaxRowSize >& M );
+	SparseMatrix& operator = ( SparseMatrix&& M );
+	SparseMatrix< T , IndexType , MaxRowSize >& operator = ( const SparseMatrix< T , IndexType , MaxRowSize >& M );
+	template< class T2 , class IndexType2 >
+	SparseMatrix< T , IndexType , MaxRowSize >& operator = ( const SparseMatrix< T2 , IndexType2 , MaxRowSize >& M );
+	~SparseMatrix( void );
+
+	template< class T2 > void operator()( const T2* in , T2* out ) const;
+
+	inline ConstPointer( MatrixEntry< T , IndexType > ) begin( size_t row ) const { return _entries + MaxRowSize * row; }
+	inline ConstPointer( MatrixEntry< T , IndexType > ) end  ( size_t row ) const { return _entries + MaxRowSize * row + (unsigned long long)_rowSizes[row]; }
+	inline size_t rows                              ( void )       const { return _rowNum; }
+	inline size_t rowSize                           ( size_t idx ) const { return _rowSizes[idx]; }
+
+	SparseMatrix( size_t rowNum );
+	void resize	( size_t rowNum );
+	void setRowSize( size_t row , size_t rowSize );
+	void resetRowSize( size_t row , size_t rowSize );
+	inline      Pointer( MatrixEntry< T , IndexType > ) operator[] ( size_t idx )       { return _entries + MaxRowSize * idx; }
+	inline ConstPointer( MatrixEntry< T , IndexType > ) operator[] ( size_t idx ) const { return _entries + MaxRowSize * idx; }
+
+	// With copy move, these should be well-behaved from a memory perspective
+	SparseMatrix  operator *  ( T s ) const;
+	SparseMatrix  operator /  ( T s ) const;
+	SparseMatrix& operator *= ( T s );
+	SparseMatrix& operator /= ( T s );
+
+	Pointer( T ) operator * ( const Pointer( T ) in ) const;
 };
 
-template< class T >
-class SparseSymmetricMatrix : public SparseMatrix< T >
-{
-public:
-
-	template< class T2 >
-	Vector< T2 > operator * ( const Vector<T2>& V ) const;
-
-	template< class T2 >
-	Vector< T2 > Multiply( const Vector<T2>& V ) const;
-
-	template< class T2 >
-	void Multiply( const Vector<T2>& In, Vector<T2>& Out , bool addDCTerm=false ) const;
-
-	template< class T2 >
-	void Multiply( const Vector<T2>& In, Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm=false ) const;
-
-	template< class T2 >
-	void Multiply( const Vector<T2>& In, Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const;
-
-	template< class T2 >
-	static int SolveCG( const SparseSymmetricMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& x ,                                T2 eps=1e-8 , int reset=1 , int threads=0  , bool addDCTerm=false , bool solveNormal=false );
-
-	template< class T2 >
-	static int SolveCG( const SparseSymmetricMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector<T2>& scratch , T2 eps=1e-8 , int reset=1 ,                  bool addDCTerm=false , bool solveNormal=false );
-#ifdef WIN32
-	template< class T2 >
-	static int SolveCGAtomic( const SparseSymmetricMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps=1e-8 , int reset=1 , int threads=0  , bool solveNormal=false );
-#endif // WIN32
-	template< class T2 >
-	static int SolveJacobi( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b ,             Vector<T2>& x , MapReduceVector<T2>& scratch , Vector<T2>& Mx , T2 sor        , int reset );
-	template< class T2 >
-	static int SolveJacobi( const SparseSymmetricMatrix<T>& M ,                              const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector<T2>& scratch ,                  T2 sor=T2(1.) , int reset=1 );
-	template< class T2 >
-	static int SolveJacobi( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b ,             Vector<T2>& x ,                                Vector<T2>& Mx , T2 sor        , int reset );
-	template< class T2 >
-	static int SolveJacobi( const SparseSymmetricMatrix<T>& M ,                              const Vector<T2>& b , int iters , Vector<T2>& x ,                                                 T2 sor=T2(1.) , int reset=1 );
-
-	enum
-	{
-		ORDERING_UPPER_TRIANGULAR ,
-		ORDERING_LOWER_TRIANGULAR ,
-		ORDERING_NONE
-	};
-	template< class T2 >
-	static int SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b ,             Vector<T2>& x , MapReduceVector<T2>& scratch , Vector<T2>& Mx , Vector<T2>& dx , bool forward , int reset   );
-	template< class T2 >
-	static int SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseSymmetricMatrix<T>& M ,                              const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector<T2>& scratch ,                                   bool forward , int reset=1 );
-
-	template< class T2 >
-	static int SolveGS( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b ,             Vector<T2>& x , MapReduceVector<T2>& scratch , Vector<T2>& Mx , Vector<T2>& dx , bool forward , int reset   , int ordering );
-	template< class T2 >
-	static int SolveGS( const SparseSymmetricMatrix<T>& M ,                              const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector<T2>& scratch ,                                   bool forward , int reset=1 , int ordering=ORDERING_NONE );
-	template< class T2 >
-	static int SolveGS( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b ,             Vector<T2>& x ,                                Vector<T2>& Mx , Vector<T2>& dx , bool forward , int reset   , int ordering );
-	template< class T2 >
-	static int SolveGS( const SparseSymmetricMatrix<T>& M ,                              const Vector<T2>& b , int iters , Vector<T2>& x ,                                                                  bool forward , int reset=1 , int ordering=ORDERING_NONE );
-
-	template< class T2 >
-	void getDiagonal( Vector< T2 >& diagonal , int threads=1 ) const;
-};
-#endif // !NEW_SPARSE_MATRIX
-
 #include "SparseMatrix.inl"
-
-#endif
-
+#endif /* __SPARSEMATRIX_HPP */
diff --git a/Src/SparseMatrix.inl b/Src/SparseMatrix.inl
index 763590a..d7a6d1c 100644
--- a/Src/SparseMatrix.inl
+++ b/Src/SparseMatrix.inl
@@ -1,4 +1,4 @@
-/*
+/* -*- C++ -*-
 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
 All rights reserved.
 
@@ -27,478 +27,642 @@ DAMAGE.
 */
 
 #include <float.h>
-#include <string.h>
+#include <complex>
+#include <unordered_map>
 
-
-///////////////////
-//  SparseMatrix //
-///////////////////
-///////////////////////////////////////
-// SparseMatrix Methods and Memebers //
-///////////////////////////////////////
-
-template< class T >
-void SparseMatrix< T >::_init( void )
+///////////////////////////////////////////////////////////////
+//  SparseMatrix (unconstrained max row size specialization) //
+///////////////////////////////////////////////////////////////
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >::SparseMatrix( void )
 {
-	_contiguous = false;
-	_maxEntriesPerRow = 0;
-	rows = 0;
-	rowSizes = NullPointer( int );
-	m_ppElements = NullPointer( Pointer( MatrixEntry< T > ) );
+	rowSizes = NullPointer( size_t );
+	rowNum = 0;
+	_entries = NullPointer( Pointer( MatrixEntry< T , IndexType > ) );
 }
 
-template< class T > SparseMatrix< T >::SparseMatrix( void ){  _init(); }
-
-template< class T > SparseMatrix< T >::SparseMatrix( int rows                        ){ _init() , Resize( rows ); }
-template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ){ _init() , Resize( rows , maxEntriesPerRow ); }
-
-template< class T >
-SparseMatrix< T >::SparseMatrix( const SparseMatrix& M )
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >::SparseMatrix( size_t rowNum )
+{
+	this->rowNum = 0;
+	rowSizes = NullPointer( size_t );
+	_entries= NullPointer( Pointer( MatrixEntry< T , IndexType > ) );
+	resize( rowNum );
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >::SparseMatrix( const SparseMatrix& M )
 {
-	_init();
-	if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
-	else                Resize( M.rows );
-	for( int i=0 ; i<rows ; i++ )
+	rowSizes = NullPointer( size_t );
+	rowNum = 0;
+	_entries = NullPointer( Pointer( MatrixEntry< T , IndexType > ) );
+	resize( M.rowNum );
+	for( int i=0 ; i<rowNum ; i++ )
 	{
-		SetRowSize( i , M.rowSizes[i] );
-		memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
+		setRowSize( i , M.rowSizes[i] );
+		for( int j=0 ; j<rowSizes[i] ; j++ ) _entries[i][j] = M._entries[i][j];
 	}
 }
-template<class T>
-int SparseMatrix<T>::Entries( void ) const
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >::SparseMatrix( SparseMatrix&& M )
 {
-	int e = 0;
-	for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
-	return e;
+	rowSizes = NullPointer( size_t );
+	rowNum = 0;
+	_entries = NullPointer( Pointer( MatrixEntry< T , IndexType > ) );
+
+	Swap( *this , M );
 }
-template<class T>
-SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M)
+template< class T , class IndexType >
+template< class T2 , class IndexType2 >
+SparseMatrix< T , IndexType , 0 >::SparseMatrix( const SparseMatrix< T2 , IndexType2 , 0 >& M )
 {
-	if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
-	else                Resize( M.rows );
-	for( int i=0 ; i<rows ; i++ )
+	rowSizes = NullPointer( size_t );
+	rowNum = 0;
+	_entries = NULL;
+	resize( M.rowNum );
+	for( int i=0 ; i<rowNum ; i++ )
 	{
-		SetRowSize( i , M.rowSizes[i] );
-		memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
+		setRowSize( i , M.rowSizes[i] );
+		for( int j=0 ; j<rowSizes[i] ; j++ ) _entries[i][j] = MatrixEntry< T , IndexType >( M._entries[i][j].N , T( M._entries[i][j].Value ) );
 	}
-	return *this;
 }
 
-template<class T>
-SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
-
-template< class T >
-bool SparseMatrix< T >::write( const char* fileName ) const
-{
-	FILE* fp = fopen( fileName , "wb" );
-	if( !fp ) return false;
-	bool ret = write( fp );
-	fclose( fp );
-	return ret;
-}
-template< class T >
-bool SparseMatrix< T >::read( const char* fileName )
+template< class T , class IndexType >
+template< class T2 , class IndexType2 >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::copy( const SparseMatrix< T2 , IndexType2 , 0 >& M  )
 {
-	FILE* fp = fopen( fileName , "rb" );
-	if( !fp ) return false;
-	bool ret = read( fp );
-	fclose( fp );
-	return ret;
+	resize( M.rowNum );
+	for ( int i=0 ; i<rowNum ; i++)
+	{
+		setRowSize( i , M.rowSizes[i] );
+		for( int j=0 ; j<rowSizes[i] ; j++ )
+		{
+			int idx = M._entries[i][j].N;
+			_entries[i][j] = MatrixEntry< T , IndexType >( idx , T( M[i][j].Value ) );
+		}
+	}
+	return *this;
 }
-template< class T >
-bool SparseMatrix< T >::write( FILE* fp ) const
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator = ( SparseMatrix< T , IndexType , 0 >&& M )
 {
-	if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
-	if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
-	for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
-	return true;
+	Swap( *this , M );
+	return *this;
 }
-template< class T >
-bool SparseMatrix< T >::read( FILE* fp )
+
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator = ( const SparseMatrix< T , IndexType , 0 >& M )
 {
-	int r;
-	if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
-	Resize( r );
-	if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
-	for( int i=0 ; i<rows ; i++ )
+	resize( M.rowNum );
+	for( int i=0 ; i<rowNum ; i++ )
 	{
-		r = rowSizes[i];
-		rowSizes[i] = 0;
-		SetRowSize( i , r );
-		if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
+		setRowSize( i , M.rowSizes[i] );
+		for( int j=0 ; j<rowSizes[i] ; j++ ) _entries[i][j]=M._entries[i][j];
 	}
-	return true;
+	return *this;
 }
-
-
-template< class T >
-void SparseMatrix< T >::Resize( int r )
+template< class T , class IndexType >
+template< class T2 , class IndexType2 >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator = (const SparseMatrix< T2 , IndexType2 , 0 >& M)
 {
-	if( rows>0 )
-	{
-		if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
-		else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
-		FreePointer( m_ppElements );
-		FreePointer( rowSizes );
-	}
-	rows = r;
-	if( r )
+	resize( M.rowNum );
+	for( int i=0 ; i<rowNum ; i++ )
 	{
-		rowSizes = AllocPointer< int >( r );
-		m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
-		memset( rowSizes , 0 , sizeof( int ) * r );
+		setRowSize( i , M.rowSizes[i] );
+		for( int j=0 ; j<rowSizes[i] ; j++ ) _entries[i][j] = MatrixEntry< T , IndexType >( M._entries[i][j].N , T( M._entries[i][j].Value ) );
 	}
-	_contiguous = false;
-	_maxEntriesPerRow = 0;
+	return *this;
 }
-template< class T >
-void SparseMatrix< T >::Resize( int r , int e )
+
+template< class T , class IndexType >
+template< class T2 >
+void SparseMatrix< T , IndexType , 0 >::operator() ( const T2* in , T2* out ) const { Interface::multiply( in , out ); }
+
+template< class T , class IndexType > SparseMatrix< T , IndexType , 0 >::~SparseMatrix( void ) { resize( 0 ); }
+
+template< class T , class IndexType >
+void SparseMatrix< T , IndexType , 0 >::resize( size_t r )
 {
-	if( rows>0 )
+	if( rowNum>0 )
 	{
-		if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
-		else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
-		FreePointer( m_ppElements );
+		for( int i=0 ; i<rowNum ; i++ ) FreePointer( _entries[i] );
+		FreePointer( _entries );
 		FreePointer( rowSizes );
 	}
-	rows = r;
+	rowNum = r;
 	if( r )
 	{
-		rowSizes = AllocPointer< int >( r );
-		m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
-		m_ppElements[0] = AllocPointer< MatrixEntry< T > >( r * e );
-		memset( rowSizes , 0 , sizeof( int ) * r );
-		for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
+		rowSizes = AllocPointer< size_t >( r ) , memset( rowSizes , 0 , sizeof(size_t)*r );
+		_entries = AllocPointer< Pointer( MatrixEntry< T , IndexType > ) >( r );
+		for( int i=0 ; i<r ; i++ ) _entries[i] = NullPointer( MatrixEntry< T , IndexType > );
 	}
-	_contiguous = true;
-	_maxEntriesPerRow = e;
 }
 
-template<class T>
-void SparseMatrix< T >::SetRowSize( int row , int count )
+template< class T , class IndexType >
+void SparseMatrix< T , IndexType , 0 >::setRowSize( size_t row , size_t count )
 {
-	if( _contiguous )
+	if( row>=0 && row<rowNum )
 	{
-		if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
+		FreePointer( _entries[row] );
+		if( count>0 )
+		{
+			_entries[ row ] = AllocPointer< MatrixEntry< T , IndexType > >( count );
+			memset( _entries[ row ] , 0 , sizeof( MatrixEntry< T , IndexType > )*count );
+		}
 		rowSizes[row] = count;
 	}
-	else if( row>=0 && row<rows )
+	else ERROR_OUT( "Row is out of bounds: 0 <= %d < %d" , (int)row , (int)rowNum );
+}
+template< class T , class IndexType >
+void SparseMatrix< T , IndexType , 0 >::resetRowSize( size_t row , size_t count )
+{
+	if( row>=0 && row<rowNum )
 	{
-		if( rowSizes[row] ) FreePointer( m_ppElements[row] );
-		if( count>0 ) m_ppElements[row] = AllocPointer< MatrixEntry< T > >( count );
-		// [WARNING] Why wasn't this line here before???
+		size_t oldCount = rowSizes[row];
+		_entries[row] = ReAllocPointer< MatrixEntry< T, IndexType > >( _entries[row] , count );
+		if( count>oldCount ) memset( _entries[row]+oldCount , 0 , sizeof( MatrixEntry< T , IndexType > ) * ( count - oldCount ) );
 		rowSizes[row] = count;
 	}
+	else ERROR_OUT( "Row is out of bounds: 0 <= %d < %d" , (int)row , (int)rowNum );
 }
 
-
-template<class T>
-void SparseMatrix<T>::SetZero()
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::Identity( size_t dim )
 {
-	Resize(this->m_N, this->m_M);
+	SparseMatrix I;
+	I.resize( dim );
+	for( int i=0 ; i<dim ; i++ ) I.setRowSize( i , 1 ) , I[i][0] = MatrixEntry< T , IndexType >( (IndexType)i , (T)1 );
+	return I;
 }
-
-template<class T>
-SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator *= ( T s )
 {
-	SparseMatrix<T> M(*this);
-	M *= V;
-	return M;
+#pragma omp parallel for
+	for( int i=0 ; i<rowNum ; i++ ) for( int j=0 ; j<rowSizes[i] ; j++ ) _entries[i][j].Value *= s;
+	return *this;
 }
-
-template<class T>
-SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V)
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator /= ( T s ){ return (*this) * ( (T)1./s ); }
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator *= ( const SparseMatrix< T , IndexType , 0 >& B )
 {
-	for( int i=0 ; i<rows ; i++ ) for( int ii=0 ; ii<rowSizes[i] ; i++ ) m_ppElements[i][ii].Value *= V;
+	SparseMatrix foo = (*this) * B;
+	(*this) = foo;
 	return *this;
 }
-
-template< class T >
-template< class T2 >
-void SparseMatrix< T >::Multiply( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator += ( const SparseMatrix< T , IndexType , 0 >& B )
+{
+	SparseMatrix foo = (*this) + B;
+	(*this) = foo;
+	return *this;
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 >& SparseMatrix< T , IndexType , 0 >::operator -= ( const SparseMatrix< T , IndexType , 0 >& B )
 {
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<rows ; i++ )
+	SparseMatrix foo = (*this) - B;
+	(*this) = foo;
+	return *this;
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::operator * ( T s ) const
+{
+	SparseMatrix out = (*this);
+	return out *= s;
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::operator / ( T s ) const { return (*this) * ( (T)1. / s ); }
+template< class T , class IndexType >
+Pointer( T ) SparseMatrix< T , IndexType , 0 >::operator * ( const Pointer( T ) in ) const
+{
+	Pointer( T ) out = AllocPointer< T >( rowNum );
+	MultiplyParallel( in , out , omp_get_num_procs() , 0 );
+	return out;
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::operator * ( const SparseMatrix< T , IndexType , 0 >& B ) const
+{
+	SparseMatrix out;
+	const SparseMatrix& A = *this;
+	size_t aCols = 0 , aRows = A.rowNum;
+	size_t bCols = 0 , bRows = B.rowNum;
+	for( int i=0 ; i<A.rowNum ; i++ ) for( int j=0 ; j<A.rowSizes[i] ; j++ ) if( aCols<=A[i][j].N ) aCols = A[i][j].N+1;
+	for( int i=0 ; i<B.rowNum ; i++ ) for( int j=0 ; j<B.rowSizes[i] ; j++ ) if( bCols<=B[i][j].N ) bCols = B[i][j].N+1;
+	if( bRows<aCols ) ERROR_OUT( "Matrix sizes do not support multiplication %lld x %lld * %lld x %lld" , (unsigned long long)aRows , (unsigned long long)aCols , (unsigned long long)bRows , (unsigned long long)bCols );
+
+	out.resize( (int)aRows );
+#pragma omp parallel for
+	for( int i=0 ; i<aRows ; i++ )
 	{
-		T2 _out(0);
-		ConstPointer( MatrixEntry< T > ) start = m_ppElements[i];
-		ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i];
-		ConstPointer( MatrixEntry< T > ) e;
-		for( e=start ; e!=end ; e++ ) _out += in[ e->N ] * e->Value;
-		out[i] = _out;
+		std::unordered_map< IndexType , T > row;
+		for( int j=0 ; j<A.rowSizes[i] ; j++ )
+		{
+			IndexType idx1 = A[i][j].N;
+			T AValue = A[i][j].Value;
+			for( int k=0 ; k<B.rowSizes[idx1] ; k++ )
+			{
+				IndexType idx2 = B[idx1][k].N;
+				T BValue = B[idx1][k].Value;
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx2);
+				if( iter==row.end() ) row[idx2] = AValue * BValue;
+				else iter->second += AValue * BValue;
+			}
+		}
+		out.setRowSize( i , (int)row.size() );
+		out.rowSizes[i] = 0;
+		for( typename std::unordered_map< IndexType , T >::iterator iter=row.begin() ; iter!=row.end() ; iter++ ) out[i][ out.rowSizes[i]++ ] = MatrixEntry< T , IndexType >( iter->first , iter->second );
 	}
+	return out;
 }
-template< class T >
-template< class T2 >
-void SparseMatrix< T >::MultiplyAndAddAverage( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const
-{
-#if 1
-	int count = 0;
-	T2 average = 0;
-#pragma omp parallel for num_threads( threads ) reduction( + : average , count )
-	for( int i=0 ; i<rows ; i++ ) if( rowSizes[i] ) average += in[i] , count++;
-	average /= count;
-	Multiply( in , out , threads );
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<rows ; i++ ) if( rowSizes[i] )  out[i] += average;
-#else
-	T2 average = 0;
-	for( int i=0 ; i<rows ; i++ ) average += in[i];
-	average /= rows;
-	Multiply( in , out , threads );
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<rows ; i++ ) out[i] += average;
-#endif
-}
-
-
-template< class T >
-template< class T2 >
-int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads )
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::operator + ( const SparseMatrix< T , IndexType , 0 >& B ) const
 {
-	M.Multiply( x , Mx , threads );
-#if ZERO_TESTING_JACOBI
-	for( int j=0 ; j<int(M.rows) ; j++ ) if( diagonal[j] ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j];
-#else // !ZERO_TESTING_JACOBI
-	for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j];
-#endif // ZERO_TESTING_JACOBI
-	return M.rows;
+	const SparseMatrix& A = *this;
+	size_t rowNum = std::max< size_t >( A.rowNum , B.rowNum );
+	SparseMatrix out;
+
+	out.resize( rowNum );
+#pragma omp parallel for
+	for( int i=0 ; i<rowNum ; i++ )
+	{
+		std::unordered_map< IndexType , T > row;
+		if( i<A.rowNum )
+			for( int j=0 ; j<A.rowSizes[i] ; j++ )
+			{
+				IndexType idx = A[i][j].N;
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx);
+				if( iter==row.end() ) row[idx] = A[i][j].Value;
+				else iter->second += A[i][j].Value;
+			}
+		if( i<B.rowNum )
+			for( int j=0 ; j<B.rowSizes[i] ; j++ )
+			{
+				IndexType idx = B[i][j].N;
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx);
+				if( iter==row.end() ) row[idx] = B[i][j].Value;
+				else iter->second += B[i][j].Value;
+			}
+		out.setRowSize( i , row.size() );
+		out.rowSizes[i] = 0;
+		for( typename std::unordered_map< IndexType , T >::iterator iter=row.begin() ; iter!=row.end() ; iter++ ) out[i][ out.rowSizes[i]++ ] = MatrixEntry< T , IndexType >( iter->first , iter->second );
+	}
+	return out;
 }
-template< class T >
-template< class T2 >
-int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads )
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::operator - ( const SparseMatrix< T , IndexType , 0 >& B ) const
 {
-	M.Multiply( x , Mx , threads );
-#if ZERO_TESTING_JACOBI
-	for( int j=0 ; j<int(M.rows) ; j++ )
+	const SparseMatrix& A = *this;
+	size_t rowNum = std::max< size_t >( A.rowNum , B.rowNum );
+	SparseMatrix out;
+
+	out.resize( rowNum );
+#pragma omp parallel for
+	for( int i=0 ; i<rowNum ; i++ )
 	{
-		T diagonal = M[j][0].Value;
-		if( diagonal ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal;
-	}
-#else // !ZERO_TESTING_JACOBI
-	for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / M[j][0].Value;
-#endif // ZERO_TESTING_JACOBI
-	return M.rows;
-}
-template<class T>
-template<class T2>
-int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward )
-{
-#define ITERATE                                                         \
-	{                                                                   \
-		ConstPointer( MatrixEntry< T > ) start = M[j];                  \
-		ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j];   \
-		ConstPointer( MatrixEntry< T > ) e;                             \
-		T2 _b = b[j];                                                   \
-		for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;       \
-		x[j] += _b / diagonal[j];                                       \
+		std::unordered_map< IndexType , T > row;
+		if( i<A.rowNum )
+			for( int j=0 ; j<A.rowSizes[i] ; j++ )
+			{
+				IndexType idx = A[i][j].N;
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx);
+				if( iter==row.end() ) row[idx] = A[i][j].Value;
+				else iter->second += A[i][j].Value;
+			}
+		if( i<B.rowNum )
+			for( int j=0 ; j<B.rowSizes[i] ; j++ )
+			{
+				IndexType idx = B[i][j].N;
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx);
+				if( iter==row.end() ) row[idx] = -B[i][j].Value;
+				else iter->second -= B[i][j].Value;
+			}
+		out.setRowSize( i , (int)row.size() );
+		out.rowSizes[i] = 0;
+		for( typename std::unordered_map< IndexType , T >::iterator iter=row.begin() ; iter!=row.end() ; iter++ ) out[i][ out.rowSizes[i]++ ] = MatrixEntry< T , IndexType >( iter->first , iter->second );
 	}
+	return out;
+}
+
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::transpose( T (*TransposeFunction)( const T& ) ) const
+{
+	SparseMatrix A;
+	const SparseMatrix& At = *this;
+	size_t aRows = 0 , aCols = At.rowNum;
+	for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ ) if( aRows<=At[i][j].N ) aRows = At[i][j].N+1;
 
-#if ZERO_TESTING_JACOBI
-	if( forward ) for( int j=0 ; j<int(M.rows)    ; j++ ){ if( diagonal[j] ){ ITERATE; } }
-	else          for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ if( diagonal[j] ){ ITERATE; } }
-#else // !ZERO_TESTING_JACOBI
-	if( forward ) for( int j=0 ; j<int(M.rows) ; j++ ){ ITERATE; }
-	else          for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ ITERATE; }
-#endif // ZERO_TESTING_JACOBI
-#undef ITERATE
-	return M.rows;
-}
-template<class T>
-template<class T2>
-int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads )
-{
-	int sum=0;
-#ifdef _WIN32
-#define SetOMPParallel __pragma( omp parallel for num_threads( threads ) )
-#else // !_WIN32
-#define SetOMPParallel _Pragma( "omp parallel for num_threads( threads )" )
-#endif // _WIN32
-#if ZERO_TESTING_JACOBI
-#define ITERATE( indices )                                                        \
-	{                                                                             \
-SetOMPParallel                                                                    \
-		for( int k=0 ; k<int( indices.size() ) ; k++ ) if( diagonal[indices[k]] ) \
-		{                                                                         \
-			int jj = indices[k];                                                  \
-			ConstPointer( MatrixEntry< T > ) start = M[jj];                       \
-			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];        \
-			ConstPointer( MatrixEntry< T > ) e;                                   \
-			T2 _b = b[jj];                                                        \
-			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;             \
-			x[jj] += _b / diagonal[jj];                                           \
-		}                                                                         \
+	A.resize( aRows );
+	for( int i=0 ; i<aRows ; i++ ) A.rowSizes[i] = 0;
+#pragma omp parallel for
+	for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
+#pragma omp atomic
+		A.rowSizes[ At[i][j].N ]++;
+#pragma omp parallel for
+	for( int i=0 ; i<A.rowNum ; i++ )
+	{
+		size_t t = A.rowSizes[i];
+		A.rowSizes[i] = 0;
+		A.setRowSize( i , t );
+		A.rowSizes[i] = 0;
 	}
-#else // !ZERO_TESTING_JACOBI
-#define ITERATE( indices )                                                  \
-	{                                                                       \
-SetOMPParallel                                                              \
-		for( int k=0 ; k<int( indices.size() ) ; k++ )                      \
-		{                                                                   \
-			int jj = indices[k];                                            \
-			ConstPointer( MatrixEntry< T > ) start = M[jj];                 \
-			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];  \
-			ConstPointer( MatrixEntry< T > ) e;                             \
-			T2 _b = b[jj];                                                  \
-			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;       \
-			x[jj] += _b / diagonal[jj];                                     \
-		}                                                                   \
+	if( TransposeFunction ) for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
+	{
+		int ii = At[i][j].N;
+		A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( i , TransposeFunction( At[i][j].Value ) );
 	}
-#endif // ZERO_TESTING_JACOBI
-	if( forward ) for( int j=0 ; j<mcIndices.size()  ; j++ ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); }
-	else for( int j=int( mcIndices.size() )-1 ; j>=0 ; j-- ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); }
-#undef ITERATE
-#undef SetOMPParallel
-	return sum;
-}
-template<class T>
-template<class T2>
-int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward )
-{
-	int start = forward ? 0 : M.rows-1 , end = forward ? M.rows : -1 , dir = forward ? 1 : -1;
-	for( int j=start ; j!=end ; j+=dir )
+	else for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
+	{
+		int ii = At[i][j].N;
+		A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( i , At[i][j].Value );
+	}
+	return A;
+}
+template< class T , class IndexType >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::transpose( size_t aRows , T (*TransposeFunction)( const T& ) ) const
+{
+	SparseMatrix A;
+	const SparseMatrix& At = *this;
+	size_t _aRows = 0 , aCols = At.rowNum;
+	for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ ) if( aCols<=At[i][j].N ) _aRows = At[i][j].N+1;
+	if( _aRows>aRows ) ERROR_OUT( "Prescribed output dimension too low: %d < %zu" , (int)aRows , _aRows );
+
+	A.resize( aRows );
+	for( int i=0 ; i<aRows ; i++ ) A.rowSizes[i] = 0;
+#pragma omp parallel for
+	for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
+#pragma omp atomic
+		A.rowSizes[ At[i][j].N ]++;
+#pragma omp parallel for
+	for( int i=0 ; i<A.rowNum ; i++ )
 	{
-		T diagonal = M[j][0].Value;
-#if ZERO_TESTING_JACOBI
-		if( diagonal )
-#endif // ZERO_TESTING_JACOBI
+		size_t t = A.rowSizes[i];
+		A.rowSizes[i] = 0;
+		A.setRowSize( i , t );
+		A.rowSizes[i] = 0;
+	}
+	if( TransposeFunction )
+		for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
 		{
-			ConstPointer( MatrixEntry< T > ) start = M[j];
-			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j];
-			ConstPointer( MatrixEntry< T > ) e;
-			start++;
-			T2 _b = b[j];
-			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;
-			x[j] = _b / diagonal;
+			int ii = At[i][j].N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( i , TransposeFunction( At[i][j].Value ) );
 		}
-	}
-	return M.rows;
+	else
+		for( int i=0 ; i<At.rowNum ; i++ ) for( int j=0 ; j<At.rowSizes[i] ; j++ )
+		{
+			int ii = At[i][j].N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( i , At[i][j].Value );
+		}
+	return A;
 }
-template<class T>
-template<class T2>
-int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads )
+
+template< class T , class IndexType >
+template< class A_const_iterator , class B_const_iterator >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::Multiply( const SparseMatrixInterface< T , A_const_iterator >& A , const SparseMatrixInterface< T , B_const_iterator >& B )
 {
-	int sum=0 , start = forward ? 0 : int( mcIndices.size() )-1 , end = forward ? int( mcIndices.size() ) : -1 , dir = forward ? 1 : -1;
-	for( int j=start ; j!=end ; j+=dir )
+	SparseMatrix M;
+	size_t aCols = 0 , aRows = A.rows();
+	size_t bCols = 0 , bRows = B.rows();
+	for( int i=0 ; i<A.rows() ; i++ ) for( A_const_iterator iter=A.begin(i) ; iter!=A.end(i) ; iter++ ) if( aCols<=iter->N ) aCols = iter->N+1;
+	for( int i=0 ; i<B.rows() ; i++ ) for( B_const_iterator iter=B.begin(i) ; iter!=B.end(i) ; iter++ ) if( bCols<=iter->N ) bCols = iter->N+1;
+	if( bRows<aCols ) ERROR_OUT( "Matrix sizes do not support multiplication %lld x %lld * %lld x %lld" , (unsigned long long)aRows , (unsigned long long)aCols , (unsigned long long)bRows , (unsigned long long)bCols );
+
+	M.resize( (int)aRows );
+#pragma omp parallel for
+	for( int i=0 ; i<aRows ; i++ )
 	{
-		const std::vector< int >& _mcIndices = mcIndices[j];
-		sum += int( _mcIndices.size() );
+		std::unordered_map< IndexType , T > row;
+		for( A_const_iterator iterA=A.begin(i) ; iterA!=A.end(i) ; iterA++ )
 		{
-#pragma omp parallel for num_threads( threads )
-			for( int k=0 ; k<int( _mcIndices.size() ) ; k++ )
+			IndexType idx1 = iterA->N;
+			T AValue = iterA->Value;
+			for( B_const_iterator iterB=B.begin(idx1) ; iterB!=B.end(idx1) ; iterB++ )
 			{
-				int jj = _mcIndices[k];
-				T diagonal = M[jj][0].Value;
-#if ZERO_TESTING_JACOBI
-				if( diagonal )
-#endif // ZERO_TESTING_JACOBI
-				{
-					ConstPointer( MatrixEntry< T > ) start = M[jj];
-					ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];
-					ConstPointer( MatrixEntry< T > ) e;
-					start++;
-					T2 _b = b[jj];
-					for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;
-					x[jj] = _b / diagonal;
-				}                                   
+				IndexType idx2 = iterB->N;
+				T BValue = iterB->Value;
+				T temp = BValue * AValue; // temp = A( i , idx1 ) * B( idx1 , idx2 )
+				typename std::unordered_map< IndexType , T >::iterator iter = row.find(idx2);
+				if( iter==row.end() ) row[idx2] = temp;
+				else iter->second += temp;
 			}
 		}
+		M.setRowSize( i , (int)row.size() );
+		M.rowSizes[i] = 0;
+		for( typename std::unordered_map< IndexType , T >::iterator iter=row.begin() ; iter!=row.end() ; iter++ )
+			M[i][ M.rowSizes[i]++ ] = MatrixEntry< T , IndexType >( iter->first , iter->second );
 	}
-	return sum;
+	return M;
 }
-
-template< class T >
-template< class T2 >
-void SparseMatrix< T >::getDiagonal( Pointer( T2 ) diagonal , int threads ) const
+template< class T , class IndexType >
+template< class const_iterator >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::Transpose( const SparseMatrixInterface< T , const_iterator >& At , T (*TransposeFunction)( const T& ) )
 {
-#pragma omp parallel for num_threads( threads )
-	for( int i=0 ; i<rows ; i++ )
+	SparseMatrix< T , IndexType , 0 > A;
+	size_t aRows = 0 , aCols = At.rows();
+	for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ ) if( aRows<=iter->N ) aRows = iter->N+1;
+
+	A.resize( aRows );
+	for( size_t i=0 ; i<aRows ; i++ ) A.rowSizes[i] = 0;
+	for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ ) A.rowSizes[ iter->N ]++;
+	for( size_t i=0 ; i<A.rows ; i++ )
 	{
-		T2 d = 0.;
-		ConstPointer( MatrixEntry< T > ) start = m_ppElements[i];
-		ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i];
-		ConstPointer( MatrixEntry< T > ) e;
-		for( e=start ; e!=end ; e++ ) if( e->N==i ) d += e->Value;
-		diagonal[i] = d;
+		size_t t = A.rowSizes[i];
+		A.rowSizes[i] = 0;
+		A.setRowSize( i , t );
+		A.rowSizes[i] = 0;
 	}
+	if( TransposeFunction )
+		for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ )
+		{
+			size_t ii = (size_t)iter->N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( (IndexType)i , TransposeFunction( iter->Value ) );
+		}
+	else
+		for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ )
+		{
+			size_t ii = (size_t)iter->N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( (IndexType)i , iter->Value );
+		}
+	return A;
 }
-template< class T >
-template< class T2 >
-int SparseMatrix< T >::SolveCG( const SparseMatrix<T>& A , ConstPointer( T2 ) b , int iters , Pointer( T2 ) x , T2 eps , int reset , bool addDCTerm , bool solveNormal , int threads )
-{
-	eps *= eps;
-	int dim = A.rows;
-	Pointer( T2 ) r = AllocPointer< T2 >( dim );
-	Pointer( T2 ) d = AllocPointer< T2 >( dim );
-	Pointer( T2 ) q = AllocPointer< T2 >( dim );
-	Pointer( T2 ) temp = NullPointer( T2 );
-	if( reset ) memset( x , 0 , sizeof(T2)* dim );
-	if( solveNormal ) temp = AllocPointer< T2 >( dim );
-
-	double delta_new = 0 , delta_0;
-	if( solveNormal )
+template< class T , class IndexType >
+template< class const_iterator >
+SparseMatrix< T , IndexType , 0 > SparseMatrix< T , IndexType , 0 >::Transpose( const SparseMatrixInterface< T , const_iterator >& At , size_t outRows , T (*TransposeFunction)( const T& ) )
+{
+	SparseMatrix< T , IndexType , 0 > A;
+	size_t _aRows = 0 , aCols = At.rows() , aRows = outRows;
+	for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ ) if( aCols<=iter->N ) _aRows = iter->N+1;
+	if( _aRows>aRows ) ERROR_OUT( "Prescribed output dimension too low: %d < %zu" , aRows , _aRows );
+
+	A.resize( aRows );
+	for( size_t i=0 ; i<aRows ; i++ ) A.rowSizes[i] = 0;
+	for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ ) A.rowSizes[ iter->N ]++;
+	for( size_t i=0 ; i<A.rows ; i++ )
 	{
-		if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )b , temp , threads );
-		else            A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads ) , A.Multiply( ( ConstPointer( T2 ) )b , temp , threads );
-#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
-		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = temp[i] - r[i] , delta_new += r[i] * r[i];
+		size_t t = A.rowSizes[i];
+		A.rowSizes[i] = 0;
+		A.setRowSize( i , t );
+		A.rowSizes[i] = 0;
 	}
+	if( TransposeFunction )
+		for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ )
+		{
+			size_t ii = (size_t)iter->N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( (IndexType)i , TransposeFunction( iter->Value ) );
+		}
 	else
+		for( size_t i=0 ; i<At.rows() ; i++ ) for( const_iterator iter=At.begin(i) ; iter!=At.end(i) ; iter++ )
+		{
+			size_t ii = (size_t)iter->N;
+			A[ii][ A.rowSizes[ii]++ ] = MatrixEntry< T , IndexType >( (IndexType)i , iter->Value );
+		}
+	return true;
+}
+
+///////////////////////////////////////////
+//  SparseMatrix (bounded max row size ) //
+///////////////////////////////////////////
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >::SparseMatrix( void )
+{
+	_rowSizes = NullPointer( size_t );
+	_rowNum = 0;
+	_entries = NullPointer( MatrixEntry< T , IndexType > );
+	_maxRows = 0;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >::SparseMatrix( size_t rowNum ) : SparseMatrix()
+{
+	resize( rowNum );
+}
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >::SparseMatrix( const SparseMatrix& M ) : SparseMatrix()
+{
+	resize( M._rowNum );
+	for( int i=0 ; i<_rowNum ; i++ )
+	{
+		_rowSizes[i] = M._rowSizes[i];
+		for( int j=0 ; j<_rowSizes[i] ; j++ ) _entries[ i + MaxRowSize*j ] = M._rowEntries[ i + MaxRowSize*j ];
+	}
+}
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >::SparseMatrix( SparseMatrix&& M ) : SparseMatrix()
+{
+	Swap( *this , M );
+}
+template< class T , class IndexType , size_t MaxRowSize >
+template< class T2 , class IndexType2 >
+SparseMatrix< T , IndexType , MaxRowSize >::SparseMatrix( const SparseMatrix< T2 , IndexType2 , MaxRowSize >& M ) : SparseMatrix()
+{
+	resize( M._rowNum );
+	for( int i=0 ; i<_rowNum ; i++ )
 	{
-		if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads );
-		else            A.Multiply( ( ConstPointer( T2 ) )x , r , threads );
-#pragma omp parallel for num_threads( threads )  reduction ( + : delta_new )
-		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = b[i] - r[i] , delta_new += r[i] * r[i];
+		_rowSizes[i] = M._rowSizes[i];
+		for( int j=0 ; j<_rowSizes[i] ; j++ ) _entries[ i + MaxRowSize*j ] = MatrixEntry< T , IndexType >( M._rowEntries[i][j].N , T( M._entries[ i + MaxRowSize*j ].Value ) );
 	}
-	delta_0 = delta_new;
-	if( delta_new<eps )
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >& SparseMatrix< T , IndexType , MaxRowSize >::operator = ( SparseMatrix< T , IndexType , MaxRowSize >&& M )
+{
+	Swap( *this , M );
+	return *this;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >& SparseMatrix< T , IndexType , MaxRowSize >::operator = ( const SparseMatrix< T , IndexType , MaxRowSize >& M )
+{
+	resize( M._rowNum );
+	for( int i=0 ; i<_rowNum ; i++ )
 	{
-//		fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
-		FreePointer( r );
-		FreePointer( d );
-		FreePointer( q );
-		FreePointer( temp );
-		return 0;
+		_rowSizes[i] = M._rowSizes[i];
+		for( int j=0 ; j<_rowSizes[i] ; j++ ) _entries[ i + MaxRowSize*j ] = M._entries[ i + MaxRowSize*j ];
 	}
-	int ii;
-	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
+	return *this;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+template< class T2 , class IndexType2 >
+SparseMatrix< T , IndexType , MaxRowSize >& SparseMatrix< T , IndexType , MaxRowSize >::operator = ( const SparseMatrix< T2 , IndexType2 , MaxRowSize >& M )
+{
+	resize( M._rowNum );
+	for( int i=0 ; i<_rowNum ; i++ )
 	{
-		if( solveNormal )
-			if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , q , threads );
-			else            A.Multiply( ( ConstPointer( T2 ) )d , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , q , threads );
-		else
-			if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , q , threads );
-			else            A.Multiply( ( ConstPointer( T2 ) )d , q , threads );
-        double dDotQ = 0;
-#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
-		for( int i=0 ; i<dim ; i++ ) dDotQ += d[i] * q[i];
-		T2 alpha = T2( delta_new / dDotQ );
-		double delta_old = delta_new;
-		delta_new = 0;
-		if( (ii%50)==(50-1) )
+		_rowSizes[i] = M._rowSizes[i];
+		for( int j=0 ; j<_rowSizes[i] ; j++ ) _entries[ i + MaxRowSize*j ] = MatrixEntry< T , IndexType >( M._entries[ i + MaxRowSize*j ].N , T( M._entries[ i + MaxRowSize*j ].Value ) );
+	}
+	return *this;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+template< class T2 >
+void SparseMatrix< T , IndexType , MaxRowSize >::operator() ( const T2* in , T2* out ) const { Interface::multiply( in , out ); }
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >::~SparseMatrix( void )
+{
+	FreePointer( _rowSizes );
+	FreePointer( _entries );
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+void SparseMatrix< T , IndexType , MaxRowSize >::resize( size_t rowNum )
+{
+	_rowNum = rowNum;
+	if( rowNum>_maxRows )
+	{
+		FreePointer( _rowSizes );
+		FreePointer( _entries );
+
+		if( rowNum )
 		{
-#pragma omp parallel for num_threads( threads )
-			for( int i=0 ; i<dim ; i++ ) x[i] += d[i] * alpha;
-			if( solveNormal )
-				if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads );
-				else            A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads );
-			else
-				if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads );
-				else            A.Multiply( ( ConstPointer( T2 ) )x , r , threads );
-#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
-			for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i] , delta_new += r[i] * r[i] , x[i] += d[i] * alpha;
+			_rowSizes = AllocPointer< size_t >( rowNum ) , memset( _rowSizes , 0 , sizeof(size_t)*rowNum );
+			_entries = AllocPointer< MatrixEntry< T , IndexType > >( rowNum * MaxRowSize );
+			_maxRows = rowNum;
 		}
-		else
-#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
-			for( int i=0 ; i<dim ; i++ ) r[i] -= q[i] * alpha , delta_new += r[i] * r[i] ,  x[i] += d[i] * alpha;
-
-		T2 beta = T2( delta_new / delta_old );
-#pragma omp parallel for num_threads( threads )
-		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + d[i] * beta;
 	}
-	FreePointer( r );
-	FreePointer( d );
-	FreePointer( q );
-	FreePointer( temp );
-	return ii;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+void SparseMatrix< T , IndexType , MaxRowSize >::setRowSize( size_t row , size_t rowSize )
+{
+	if( row>=_rowNum ) ERROR_OUT( "Row is out of bounds: 0 <= %d < %d" , (int)row , (int)_rowNum );
+	else if( rowSize>MaxRowSize ) ERROR_OUT( "Row size larger than max row size: %d < %d" , (int)rowSize , (int)MaxRowSize );
+	else _rowSizes[row] = rowSize;
+}
+template< class T , class IndexType , size_t MaxRowSize >
+void SparseMatrix< T , IndexType , MaxRowSize >::resetRowSize( size_t row , size_t rowSize )
+{
+	if( row>=_rowNum ) ERROR_OUT( "Row is out of bounds: 0 <= %d < %d" , (int)row , (int)_rowNum );
+	else if( rowSize>MaxRowSize ) ERROR_OUT( "Row size larger than max row size: %d < %d" , (int)rowSize , (int)MaxRowSize );
+	else _rowSizes[row] = rowSize;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >& SparseMatrix< T , IndexType , MaxRowSize >::operator *= ( T s )
+{
+#pragma omp parallel for
+	for( int i=0 ; i<_rowNum*MaxRowSize ; i++ ) _entries[i].Value *= s;
+	return *this;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize >& SparseMatrix< T , IndexType , MaxRowSize >::operator /= ( T s ){ return (*this) * ( (T)1./s ); }
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize > SparseMatrix< T , IndexType , MaxRowSize >::operator * ( T s ) const
+{
+	SparseMatrix out = (*this);
+	return out *= s;
+}
+
+template< class T , class IndexType , size_t MaxRowSize >
+SparseMatrix< T , IndexType , MaxRowSize > SparseMatrix< T , IndexType , MaxRowSize >::operator / ( T s ) const { return (*this) * ( (T)1. / s ); }
+
+template< class T , class IndexType , size_t MaxRowSize >
+Pointer( T ) SparseMatrix< T , IndexType , MaxRowSize >::operator * ( const Pointer( T ) in ) const
+{
+	Pointer( T ) out = AllocPointer< T >( _rowNum );
+	MultiplyParallel( in , out , omp_get_num_procs() , 0 );
+	return out;
 }
diff --git a/Src/SparseMatrixInterface.h b/Src/SparseMatrixInterface.h
new file mode 100644
index 0000000..f24b29f
--- /dev/null
+++ b/Src/SparseMatrixInterface.h
@@ -0,0 +1,64 @@
+#ifndef SPARSE_MATRIX_INTERFACE_INCLUDED
+#define SPARSE_MATRIX_INTERFACE_INCLUDED
+
+#define FORCE_TWO_BYTE_ALIGNMENT 1
+#include "Array.h"
+
+#if FORCE_TWO_BYTE_ALIGNMENT
+#pragma pack(push)
+#pragma pack(2)
+#endif // FORCE_TWO_BYTE_ALIGNMENT
+template< class T , class IndexType=int >
+struct MatrixEntry
+{
+	MatrixEntry( void )             { N =-1 , Value = 0; }
+	MatrixEntry( IndexType i )      { N = i , Value = 0; }
+	MatrixEntry( IndexType n , T v ){ N = n , Value = v; }
+	IndexType N;
+	T Value;
+};
+#if FORCE_TWO_BYTE_ALIGNMENT
+#pragma pack(pop)
+#endif // FORCE_TWO_BYTE_ALIGNMENT
+
+
+enum
+{
+	MULTIPLY_ADD = 1 ,
+	MULTIPLY_NEGATE = 2
+};
+
+template< class T , class const_iterator > class SparseMatrixInterface
+{
+public:
+	virtual const_iterator begin( size_t row ) const = 0;
+	virtual const_iterator end  ( size_t row ) const = 0;
+	virtual size_t rows   ( void )             const = 0;
+	virtual size_t rowSize( size_t idx )       const = 0;
+
+	size_t entries( void ) const;
+
+	double squareNorm( void ) const;
+	double squareASymmetricNorm( void ) const;
+	double squareASymmetricNorm( int& idx1 , int& idx2 ) const;
+
+	template< class T2 > void multiply      (           ConstPointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag=0 ) const;
+	template< class T2 > void multiplyScaled( T scale , ConstPointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag=0 ) const;
+	template< class T2 > void multiply      (                Pointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag=0 ) const { multiply      (         ( ConstPointer(T2) )( In ) , Out , multiplyFlag ); }
+	template< class T2 > void multiplyScaled( T scale ,      Pointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag=0 ) const { multiplyScaled( scale , ( ConstPointer(T2) )( In ) , Out , multiplyFlag ); }
+
+	void setDiagonal( Pointer( T ) diagonal ) const;
+	void setDiagonalR( Pointer( T ) diagonal ) const;
+	template< class T2 > void jacobiIteration( ConstPointer( T ) diagonal , ConstPointer( T2 ) b , ConstPointer( T2 ) in , Pointer( T2 ) out , bool dReciprocal ) const;
+	template< class T2 > void gsIteration(                                                              ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , bool dReciprocal ) const;
+	template< class T2 > void gsIteration( const              std::vector< int >  & multiColorIndices , ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x ,                bool dReciprocal ) const;
+	template< class T2 > void gsIteration( const std::vector< std::vector< int > >& multiColorIndices , ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , bool dReciprocal ) const;
+};
+
+// Assuming that the SPDOperator class defines:
+//		auto SPDOperator::()( ConstPointer( T ) , Pointer( T ) ) const
+template< class SPDFunctor , class T , typename Real , class TDotTFunctor > int SolveCG( const SPDFunctor& M , int dim , ConstPointer( T ) b , int iters , Pointer( T ) x , double eps , TDotTFunctor Dot );
+template< class SPDFunctor , class Preconditioner , class T , typename Real , class TDotTFunctor > int SolveCG( const SPDFunctor& M , const Preconditioner& P , int dim , ConstPointer( T ) b , int iters , Pointer( T ) x , double eps , TDotTFunctor Dot );
+
+#include "SparseMatrixInterface.inl"
+#endif // SPARSE_MATRIX_INTERFACE_INCLUDED
diff --git a/Src/SparseMatrixInterface.inl b/Src/SparseMatrixInterface.inl
new file mode 100644
index 0000000..d135d4f
--- /dev/null
+++ b/Src/SparseMatrixInterface.inl
@@ -0,0 +1,363 @@
+
+template< class T , class const_iterator > size_t SparseMatrixInterface< T , const_iterator >::entries( void ) const
+{
+	size_t entries = 0;
+	for( size_t i=0 ; i<rows() ; i++ ) entries += rowSize( i );
+	return entries;
+}
+template< class T , class const_iterator > double SparseMatrixInterface< T , const_iterator >::squareNorm( void ) const
+{
+	double n=0;
+	for( size_t i=0 ; i<rows() ; i++ )
+	{
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ ) n += iter->Value * iter->Value;
+	}
+	return n;
+
+}
+template< class T , class const_iterator > double SparseMatrixInterface< T , const_iterator >::squareASymmetricNorm( void ) const
+{
+	double n=0;
+	for( size_t i=0 ; i<rows() ; i++ )
+	{
+		const_iterator e = end( i );
+		for( const_iterator iter1 = begin( i ) ; iter1!=e ; iter1++ )
+		{
+			int j = iter1->N;
+			const_iterator e = end( j );
+			double value = 0;
+			for( const_iterator iter2 = begin( j ) ; iter2!=e ; iter2++ )
+			{
+				int k = iter2->N;
+				if( k==i ) value += iter2->Value;
+			}
+			n += (iter1->Value-value) * (iter1->Value-value);
+		}
+	}
+	return n;
+}
+template< class T , class const_iterator > double SparseMatrixInterface< T , const_iterator >::squareASymmetricNorm( int& idx1 , int& idx2 ) const
+{
+	double n=0;
+	double max=0;
+	for( size_t i=0 ; i<rows() ; i++ )
+	{
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ )
+		{
+			int j = iter->N;
+			const_iterator e = end( j );
+			double value = 0;
+			for( const_iterator iter2 = begin( j ) ; iter2!=e ; iter2++ )
+			{
+				int k = iter2->N;
+				if( k==i ) value += iter2->Value;
+			}
+			double temp = (iter->Value-value) * (iter->Value-value);
+			n += temp;
+			if( temp>=max ) idx1 = i , idx2 = j , max=temp;
+		}
+	}
+	return n;
+}
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::multiply( ConstPointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag ) const
+{
+	ConstPointer( T2 ) in = In;
+#pragma omp parallel for
+	for( int i=0 ; i<rows() ; i++ )
+	{
+		T2 temp;
+		memset( &temp , 0 , sizeof(T2) );
+		ConstPointer( T2 ) _in = in;
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ ) temp += (T2)( _in[ iter->N ] * iter->Value );
+		if( multiplyFlag & MULTIPLY_NEGATE ) temp = -temp;
+		if( multiplyFlag & MULTIPLY_ADD ) Out[i] += temp;
+		else                              Out[i]  = temp;
+	}
+}
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::multiplyScaled( T scale , ConstPointer( T2 ) In , Pointer( T2 ) Out , int multiplyFlag ) const
+{
+	ConstPointer( T2 ) in = In;
+#pragma omp parallel for
+	for( int i=0 ; i<rows() ; i++ )
+	{
+		T2 temp;
+		memset( &temp , 0 , sizeof(T2) );
+		ConstPointer( T2 ) _in = in;
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ ) temp += _in[ iter->N ] * iter->Value;
+		temp *= scale;
+		if( multiplyFlag & MULTIPLY_NEGATE ) temp = -temp;
+		if( multiplyFlag & MULTIPLY_ADD ) Out[i] += temp;
+		else                              Out[i]  = temp;
+	}
+}
+
+template< class T , class const_iterator >
+void SparseMatrixInterface< T , const_iterator >::setDiagonal( Pointer( T ) diagonal ) const
+{
+#pragma omp parallel for
+	for( int i=0 ; i<rows() ; i++ )
+	{
+		diagonal[i] = (T)0;
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ ) if( iter->N==i ) diagonal[i] += iter->Value;
+	}
+}
+
+template< class T , class const_iterator >
+void SparseMatrixInterface< T , const_iterator >::setDiagonalR( Pointer( T ) diagonal ) const
+{
+#pragma omp parallel for
+	for( int i=0 ; i<rows() ; i++ )
+	{
+		diagonal[i] = (T)0;
+		const_iterator e = end( i );
+		for( const_iterator iter = begin( i ) ; iter!=e ; iter++ ) if( iter->N==i ) diagonal[i] += iter->Value;
+		if( diagonal[i] ) diagonal[i] = (T)( 1./diagonal[i] );
+	}
+}
+
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::jacobiIteration( ConstPointer( T ) diagonal , ConstPointer( T2 ) b , ConstPointer( T2 ) in , Pointer( T2 ) out , bool dReciprocal ) const
+{
+	multiply( in , out );
+	if( dReciprocal )
+#pragma omp parallel for
+		for( int i=0 ; i<rows() ; i++ ) out[i] = in[i] + ( b[i] - out[i] ) * diagonal[i];
+	else
+#pragma omp parallel for
+		for( int i=0 ; i<rows() ; i++ ) out[i] = in[i] + ( b[i] - out[i] ) / diagonal[i];
+}
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::gsIteration( ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , bool dReciprocal ) const
+{
+	if( dReciprocal )
+	{
+#define ITERATE( j )                                                                                \
+	{                                                                                               \
+		T2 _b = b[j];                                                                               \
+		const_iterator e = end( j );                                                                \
+		for( const_iterator iter = begin( j ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;  \
+		x[j] += _b * diagonal[j];                                                                   \
+	}
+		if( forward ) for( int j=0 ; j<int( rows() ) ; j++ ){ ITERATE( j ); }
+		else          for( int j=int( rows() )-1 ; j>=0 ; j-- ){ ITERATE( j ); }
+#undef ITERATE
+	}
+	else
+	{
+#define ITERATE( j )                                                                                \
+	{                                                                                               \
+		T2 _b = b[j];                                                                               \
+		const_iterator e = end( j );                                                                \
+		for( const_iterator iter = begin( j ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;  \
+		x[j] += _b / diagonal[j];                                                                   \
+	}
+
+		if( forward ) for( int j=0 ; j<int( rows() ) ; j++ ){ ITERATE( j ); }
+		else          for( int j=int( rows() )-1 ; j>=0 ; j-- ){ ITERATE( j ); }
+#undef ITERATE
+	}
+}
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::gsIteration( const std::vector< int >& multiColorIndices , ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool dReciprocal ) const
+{
+	if( dReciprocal )
+#pragma omp parallel for
+		for( int j=0 ; j<(int)multiColorIndices.size() ; j++ )
+		{
+			int jj = multiColorIndices[j];
+			T2 _b = b[jj];
+			const_iterator e = end( jj );
+			for( const_iterator iter = begin( jj ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;
+			x[jj] += _b * diagonal[jj];
+		}
+	else
+#pragma omp parallel for
+		for( int j=0 ; j<(int)multiColorIndices.size() ; j++ )
+		{
+			int jj = multiColorIndices[j];
+			T2 _b = b[jj];
+			const_iterator e = end( jj );
+			for( const_iterator iter = begin( jj ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;
+			x[jj] += _b / diagonal[jj];
+		}
+}
+
+template< class T , class const_iterator >
+template< class T2 >
+void SparseMatrixInterface< T , const_iterator >::gsIteration( const std::vector< std::vector< int > >& multiColorIndices , ConstPointer( T ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , bool dReciprocal ) const
+{
+#ifdef _WIN32
+#define SetOMPParallel __pragma( omp parallel for )
+#else // !_WIN32
+#define SetOMPParallel _Pragma( "omp parallel for" )
+#endif // _WIN32
+
+	if( dReciprocal )
+	{
+#define ITERATE( indices )                                                                               \
+	{                                                                                                    \
+SetOMPParallel                                                                                           \
+		for( int k=0 ; k<int( indices.size() ) ; k++ )                                                   \
+		{                                                                                                \
+			int jj = indices[k];                                                                         \
+			T2 _b = b[jj];                                                                               \
+			const_iterator e = end( jj );                                                                \
+			for( const_iterator iter = begin( jj ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;  \
+			x[jj] += _b * diagonal[jj];                                                                  \
+		}                                                                                                \
+	}
+		if( forward ) for( int j=0 ; j<multiColorIndices.size()  ; j++ ){ ITERATE( multiColorIndices[j] ); }
+		else for( int j=int( multiColorIndices.size() )-1 ; j>=0 ; j-- ){ ITERATE( multiColorIndices[j] ); }
+#undef ITERATE
+	}
+	else
+	{
+#define ITERATE( indices )                                                                               \
+	{                                                                                                    \
+SetOMPParallel                                                                                           \
+		for( int k=0 ; k<int( indices.size() ) ; k++ )                                                   \
+		{                                                                                                \
+			int jj = indices[k];                                                                         \
+			T2 _b = b[jj];                                                                               \
+			const_iterator e = end( jj );                                                                \
+			for( const_iterator iter = begin( jj ) ; iter!=e ; iter++ ) _b -= x[iter->N] * iter->Value;  \
+			x[jj] += _b / diagonal[jj];                                                                  \
+		}                                                                                                \
+	}
+		if( forward ) for( int j=0 ; j<multiColorIndices.size()  ; j++ ){ ITERATE( multiColorIndices[j] ); }
+		else for( int j=int( multiColorIndices.size() )-1 ; j>=0 ; j-- ){ ITERATE( multiColorIndices[j] ); }
+#undef ITERATE
+	}
+#undef SetOMPParallel
+}
+template< class SPDFunctor , class T , typename Real , class TDotTFunctor > int SolveCG( const SPDFunctor& M , int dim , ConstPointer( T ) b , int iters , Pointer( T ) x , double eps , TDotTFunctor Dot )
+{
+	eps *= eps;
+	Pointer( T ) r = AllocPointer< T >( dim );
+	Pointer( T ) d = AllocPointer< T >( dim );
+	Pointer( T ) q = AllocPointer< T >( dim );
+
+	Real delta_new = 0 , delta_0;
+	M( ( ConstPointer( T ) )x , r );
+#pragma omp parallel for reduction( + : delta_new )
+	for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = b[i] - r[i] , delta_new += Dot( r[i] , r[i] );
+
+	delta_0 = delta_new;
+	if( delta_new<=eps )
+	{
+		FreePointer( r );
+		FreePointer( d );
+		FreePointer( q );
+		return 0;
+	}
+	int ii;
+	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
+	{
+		M( ( ConstPointer( T ) )d , q );
+		Real dDotQ = 0;
+#pragma omp parallel for reduction( + : dDotQ )
+		for( int i=0 ; i<dim ; i++ ) dDotQ += Dot( d[i] , q[i] );
+		if( !dDotQ ) break;
+
+		Real alpha = delta_new / dDotQ;
+		Real delta_old = delta_new;
+		delta_new = 0;
+		if( (ii%50)==(50-1) )
+		{
+#pragma omp parallel for
+			for( int i=0 ; i<dim ; i++ ) x[i] += (T)( d[i] * alpha );
+			M( ( ConstPointer( T ) )x , r );
+#pragma omp parallel for reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i] , delta_new += Dot( r[i] , r[i] ) , x[i] += (T)( d[i] * alpha );
+		}
+		else
+#pragma omp parallel for reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] -=(T)( q[i] * alpha ) , delta_new += Dot( r[i] , r[i] ) ,  x[i] += (T)( d[i] * alpha );
+
+		Real beta = delta_new / delta_old;
+#pragma omp parallel for
+		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + (T)( d[i] * beta );
+	}
+	FreePointer( r );
+	FreePointer( d );
+	FreePointer( q );
+	return ii;
+}
+template< class SPDFunctor , class Preconditioner , class T , typename Real , class TDotTFunctor > int SolveCG( const SPDFunctor& M , const Preconditioner& P , int dim , ConstPointer( T ) b , int iters , Pointer( T ) x , double eps , TDotTFunctor Dot  )
+{
+	eps *= eps;
+	Pointer( T ) r = AllocPointer< T >( dim );
+	Pointer( T ) d = AllocPointer< T >( dim );
+	Pointer( T ) q = AllocPointer< T >( dim );
+	Pointer( T ) Pb = AllocPointer< T >( dim );
+	Pointer( T ) temp = AllocPointer< T >( dim );
+
+	auto PM = [&] ( ConstPointer(T) x , Pointer(T) y )
+	{
+		M( x , temp );
+		P( ( ConstPointer(T) )temp , y );
+	};
+
+	Real delta_new = 0 , delta_0;
+	P( b , Pb );
+	PM( ( ConstPointer( T ) )x , r );
+#pragma omp parallel for reduction( + : delta_new )
+	for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = Pb[i] - r[i] , delta_new += Dot( r[i] , r[i] );
+
+	delta_0 = delta_new;
+	if( delta_new<=eps )
+	{
+		FreePointer( Pb );
+		FreePointer( r );
+		FreePointer( d );
+		FreePointer( q );
+		FreePointer( temp );
+		return 0;
+	}
+	int ii;
+	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
+	{
+		PM( ( ConstPointer( T ) )d , q );
+		Real dDotQ = 0;
+#pragma omp parallel for reduction( + : dDotQ )
+		for( int i=0 ; i<dim ; i++ ) dDotQ += Dot( d[i] , q[i] );
+		if( !dDotQ ) break;
+
+		Real alpha = delta_new / dDotQ;
+		Real delta_old = delta_new;
+		delta_new = 0;
+		if( (ii%50)==(50-1) )
+		{
+#pragma omp parallel for
+			for( int i=0 ; i<dim ; i++ ) x[i] += (T)( d[i] * alpha );
+			PM( ( ConstPointer( T ) )x , r );
+#pragma omp parallel for reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] = Pb[i] - r[i] , delta_new += Dot( r[i] , r[i] ) , x[i] += (T)( d[i] * alpha );
+		}
+		else
+#pragma omp parallel for reduction( + : delta_new )
+			for( int i=0 ; i<dim ; i++ ) r[i] -=(T)( q[i] * alpha ) , delta_new += Dot( r[i] , r[i] ) ,  x[i] += (T)( d[i] * alpha );
+
+		Real beta = delta_new / delta_old;
+#pragma omp parallel for
+		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + (T)( d[i] * beta );
+	}
+	FreePointer( Pb );
+	FreePointer( r );
+	FreePointer( d );
+	FreePointer( q );
+	FreePointer( temp );
+	return ii;
+}
diff --git a/Src/SurfaceTrimmer.cpp b/Src/SurfaceTrimmer.cpp
index 62a8fee..ca1fe7a 100644
--- a/Src/SurfaceTrimmer.cpp
+++ b/Src/SurfaceTrimmer.cpp
@@ -27,39 +27,52 @@ DAMAGE.
 */
 
 #undef ARRAY_DEBUG
+#define DIMENSION 3
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <float.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif // _OPENMP
 #include <algorithm>
+#include "FEMTree.h"
+#include "MyMiscellany.h"
 #include "CmdLineParser.h"
+#include "MAT.h"
 #include "Geometry.h"
 #include "Ply.h"
-#include "MAT.h"
-#include "MyTime.h"
+#include "PointStreamData.h"
+
+MessageWriter messageWriter;
+
+
+cmdLineParameter< char* >
+	In( "in" ) ,
+	Out( "out" );
+cmdLineParameter< int >
+	Smooth( "smooth" , 5 );
+cmdLineParameter< float >
+	Trim( "trim" ) ,
+	IslandAreaRatio( "aRatio" , 0.001f );
+cmdLineReadable
+	PolygonMesh( "polygonMesh" ) ,
+	Verbose( "verbose" );
 
-cmdLineString In( "in" ) , Out( "out" );
-cmdLineInt Smooth( "smooth" , 5 );
-cmdLineFloat Trim( "trim" ) , IslandAreaRatio( "aRatio" , 0.001f );
-cmdLineReadable PolygonMesh( "polygonMesh" );
 
 cmdLineReadable* params[] =
 {
-	&In , &Out , &Trim , &PolygonMesh , &Smooth , &IslandAreaRatio
+	&In , &Out , &Trim , &PolygonMesh , &Smooth , &IslandAreaRatio , &Verbose ,
+	NULL
 };
 
 void ShowUsage( char* ex )
 {
 	printf( "Usage: %s\n" , ex );
 	printf( "\t --%s <input polygon mesh>\n" , In.name );
-	printf( "\t[--%s <trimming value>]\n" , Trim.name );
+	printf( "\t --%s <trimming value>\n" , Trim.name );
 	printf( "\t[--%s <ouput polygon mesh>]\n" , Out.name );
 	printf( "\t[--%s <smoothing iterations>=%d]\n" , Smooth.name , Smooth.value );
 	printf( "\t[--%s <relative area of islands>=%f]\n" , IslandAreaRatio.name , IslandAreaRatio.value );
 	printf( "\t[--%s]\n" , PolygonMesh.name );
+	printf( "\t[--%s]\n" , Verbose.name );
 }
 
 long long EdgeKey( int key1 , int key2 )
@@ -68,17 +81,15 @@ long long EdgeKey( int key1 , int key2 )
 	else            return ( ( (long long)key2 )<<32 ) | ( (long long)key1 );
 }
 
-template< class Real , class Vertex >
-Vertex InterpolateVertices( const Vertex& v1 , const Vertex& v2 , Real value )
+template< typename Real , typename ... VertexData >
+PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > > InterpolateVertices( const PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > >& v1 , const PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > >& v2 , Real value )
 {
-	typename Vertex::Wrapper _v1(v1) , _v2(v2);
-	if( _v1.value==_v2.value ) return Vertex( (_v1+_v2)/Real(2.) );
-
-	Real dx = ( _v1.value-value ) / ( _v1.value-_v2.value );
-	return Vertex( _v1*(1.f-dx) + _v2*dx );
+	if( std::get<0>( v1.data.data ).data==std::get<0>( v2.data.data ).data ) return (v1+v2)/Real(2.);
+	Real dx = ( std::get<0>( v1.data.data ).data-value ) / ( std::get<0>( v1.data.data ).data-std::get<0>( v2.data.data ).data );
+	return v1*(1.f-dx) + v2*dx;
 }
-template< class Real , class Vertex >
-void SmoothValues( std::vector< Vertex >& vertices , const std::vector< std::vector< int > >& polygons )
+template< typename Real , typename ... VertexData >
+void SmoothValues( std::vector< PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > > >& vertices , const std::vector< std::vector< int > >& polygons )
 {
 	std::vector< int > count( vertices.size() );
 	std::vector< Real > sums( vertices.size() , 0 );
@@ -90,16 +101,16 @@ void SmoothValues( std::vector< Vertex >& vertices , const std::vector< std::vec
 			int j1 = j , j2 = (j+1)%sz;
 			int v1 = polygons[i][j1] , v2 = polygons[i][j2];
 			count[v1]++ , count[v2]++;
-			sums[v1] += vertices[v2].value , sums[v2] += vertices[v1].value;
+			sums[v1] += std::get< 0 >( vertices[v2].data.data ).data , sums[v2] += std::get< 0 >( vertices[v1].data.data ).data;
 		}
 	}
-	for( size_t i=0 ; i<vertices.size() ; i++ ) vertices[i].value = ( sums[i] + vertices[i].value ) / ( count[i] + 1 );
+	for( size_t i=0 ; i<vertices.size() ; i++ ) std::get< 0 >( vertices[i].data.data ).data = ( sums[i] + std::get< 0 >( vertices[i].data.data ).data ) / ( count[i] + 1 );
 }
-template< class Real , class Vertex >
+template< class Real , typename ... VertexData >
 void SplitPolygon
 	(
 	const std::vector< int >& polygon ,
-	std::vector< Vertex >& vertices , 
+	std::vector< PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > > >& vertices ,
 	std::vector< std::vector< int > >* ltPolygons , std::vector< std::vector< int > >* gtPolygons ,
 	std::vector< bool >* ltFlags , std::vector< bool >* gtFlags ,
 	std::unordered_map< long long, int >& vertexTable,
@@ -111,7 +122,7 @@ void SplitPolygon
 	int gtCount = 0;
 	for( int j=0 ; j<sz ; j++ )
 	{
-		gt[j] = ( vertices[ polygon[j] ].value>trimValue );
+		gt[j] = ( std::get<0>( vertices[ polygon[j] ].data.data ).data>trimValue );
 		if( gt[j] ) gtCount++;
 	}
 	if     ( gtCount==sz ){ if( gtPolygons ) gtPolygons->push_back( polygon ) ; if( gtFlags ) gtFlags->push_back( false ); }
@@ -170,11 +181,9 @@ void Triangulate( const std::vector< Vertex >& vertices , const std::vector< std
 	for( size_t i=0 ; i<polygons.size() ; i++ )
 		if( polygons.size()>3 )
 		{
-			MinimalAreaTriangulation< Real > mat;
-			std::vector< Point3D< Real > > _vertices( polygons[i].size() );
-			std::vector< TriangleIndex > _triangles;
+			std::vector< Point< Real , DIMENSION > > _vertices( polygons[i].size() );
 			for( int j=0 ; j<int( polygons[i].size() ) ; j++ ) _vertices[j] = vertices[ polygons[i][j] ].point;
-			mat.GetTriangulation( _vertices , _triangles );
+			std::vector< TriangleIndex > _triangles = MinimalAreaTriangulation< Real , DIMENSION >( ( ConstPointer( Point< Real , DIMENSION > ) )GetPointer( _vertices ) , _vertices.size() );
 
 			// Add the triangles to the mesh
 			size_t idx = triangles.size();
@@ -191,14 +200,14 @@ template< class Real , class Vertex >
 double PolygonArea( const std::vector< Vertex >& vertices , const std::vector< int >& polygon )
 {
 	if( polygon.size()<3 ) return 0.;
-	else if( polygon.size()==3 ) return TriangleArea( vertices[polygon[0]].point , vertices[polygon[1]].point , vertices[polygon[2]].point );
+	else if( polygon.size()==3 ) return Area( vertices[polygon[0]].point , vertices[polygon[1]].point , vertices[polygon[2]].point );
 	else
 	{
-		Point3D< Real > center;
+		Point< Real , DIMENSION > center;
 		for( size_t i=0 ; i<polygon.size() ; i++ ) center += vertices[ polygon[i] ].point;
 		center /= Real( polygon.size() );
 		double area = 0;
-		for( size_t i=0 ; i<polygon.size() ; i++ ) area += TriangleArea( center , vertices[ polygon[i] ].point , vertices[ polygon[ (i+1)%polygon.size() ] ].point );
+		for( size_t i=0 ; i<polygon.size() ; i++ ) area += Area( center , vertices[ polygon[i] ].point , vertices[ polygon[ (i+1)%polygon.size() ] ].point );
 		return area;
 	}
 }
@@ -264,42 +273,40 @@ void SetConnectedComponents( const std::vector< std::vector< int > >& polygons ,
 	components.resize( cCount );
 	for( int i=0 ; i<int(polygonRoots.size()) ; i++ ) components[ vMap[ polygonRoots[i] ] ].push_back(i);
 }
-template< class Real >
-inline Point3D< Real > CrossProduct( Point3D< Real > p1 , Point3D< Real > p2 ){ return Point3D< Real >( p1[1]*p2[2]-p1[2]*p2[1] , p1[2]*p2[0]-p1[0]*p2[2] , p1[0]*p1[1]-p1[1]*p2[0] ); }
-template< class Real >
-double TriangleArea( Point3D< Real > v1 , Point3D< Real > v2 , Point3D< Real > v3 )
-{
-	Point3D< Real > n = CrossProduct( v2-v1 , v3-v1 );
-	return sqrt( n[0]*n[0] + n[1]*n[1] + n[2]*n[2] ) / 2.;
-}
-template< class Vertex >
+template< typename ... VertexData >
 int Execute( void )
 {
+	typedef PlyVertexWithData< float , DIMENSION , MultiPointStreamData< float , PointStreamValue< float > , VertexData ... > > Vertex;
 	float min , max;
-	int paramNum = sizeof(params)/sizeof(cmdLineReadable*);
 	std::vector< Vertex > vertices;
 	std::vector< std::vector< int > > polygons;
 
-	int ft , commentNum = paramNum+2;
-	char** comments;
-	PlyReadPolygons( In.value , vertices , polygons , Vertex::ReadProperties , Vertex::ReadComponents , ft , &comments , &commentNum );
-	for( int i=0 ; i<Smooth.value ; i++ ) SmoothValues< float , Vertex >( vertices , polygons );
-	min = max = vertices[0].value;
-	for( size_t i=0 ; i<vertices.size() ; i++ ) min = std::min< float >( min , vertices[i].value ) , max = std::max< float >( max , vertices[i].value );
-	printf( "Value Range: [%f,%f]\n" , min , max );
+	int ft;
+	std::vector< std::string > comments;
+	PlyReadPolygons< Vertex >( In.value , vertices , polygons , Vertex::PlyReadProperties() , Vertex::PlyReadNum , ft , comments );
+
+	for( int i=0 ; i<Smooth.value ; i++ ) SmoothValues< float >( vertices , polygons );
+	min = max = std::get< 0 >( vertices[0].data.data ).data;
+	for( size_t i=0 ; i<vertices.size() ; i++ ) min = std::min< float >( min , std::get< 0 >( vertices[i].data.data ).data ) , max = std::max< float >( max , std::get< 0 >( vertices[i].data.data ).data );
 
 	std::unordered_map< long long, int > vertexTable;
 	std::vector< std::vector< int > > ltPolygons , gtPolygons;
 	std::vector< bool > ltFlags , gtFlags;
 
-	for( int i=0 ; i<paramNum+2 ; i++ ) comments[i+commentNum]=new char[1024];
-	sprintf( comments[commentNum++] , "Running Surface Trimmer (V5)" );
-	if(              In.set ) sprintf(comments[commentNum++],"\t--%s %s" , In.name , In.value );
-	if(             Out.set ) sprintf(comments[commentNum++],"\t--%s %s" , Out.name , Out.value );
-	if(            Trim.set ) sprintf(comments[commentNum++],"\t--%s %f" , Trim.name , Trim.value );
-	if(          Smooth.set ) sprintf(comments[commentNum++],"\t--%s %d" , Smooth.name , Smooth.value );
-	if( IslandAreaRatio.set ) sprintf(comments[commentNum++],"\t--%s %f" , IslandAreaRatio.name , IslandAreaRatio.value );
-	if(     PolygonMesh.set ) sprintf(comments[commentNum++],"\t--%s" , PolygonMesh.name );
+	messageWriter( comments , "*********************************************\n" );
+	messageWriter( comments , "*********************************************\n" );
+	messageWriter( comments , "** Running Surface Trimmer (Version %s) **\n" , VERSION );
+	messageWriter( comments , "*********************************************\n" );
+	messageWriter( comments , "*********************************************\n" );
+	char str[1024];
+	for( int i=0 ; params[i] ; i++ )
+		if( params[i]->set )
+		{
+			params[i]->writeValue( str );
+			if( strlen( str ) ) messageWriter( comments , "\t--%s %s\n" , params[i]->name , str );
+			else                messageWriter( comments , "\t--%s\n" , params[i]->name );
+		}
+	if( Verbose.set ) printf( "Value Range: [%f,%f]\n" , min , max );
 
 	double t=Time();
 	for( size_t i=0 ; i<polygons.size() ; i++ ) SplitPolygon( polygons[i] , vertices , &ltPolygons , &gtPolygons , &ltFlags , &gtFlags , vertexTable , Trim.value );
@@ -355,28 +362,40 @@ int Execute( void )
 	}
 
 	RemoveHangingVertices( vertices , gtPolygons );
-	sprintf( comments[commentNum++] , "#Trimmed In: %9.1f (s)" , Time()-t );
-	if( Out.set ) PlyWritePolygons( Out.value , vertices , gtPolygons , Vertex::WriteProperties , Vertex::WriteComponents , ft , comments , commentNum );
-
+	char comment[1024];
+	sprintf( comment , "#Trimmed In: %9.1f (s)" , Time()-t );
+	comments.push_back( comment );
+	if( Out.set )
+		if( !PlyWritePolygons< Vertex >( Out.value , vertices , gtPolygons , Vertex::PlyWriteProperties() , Vertex::PlyWriteNum , ft , comments ) )
+			ERROR_OUT( "Could not write mesh to: %s" , Out.value );
+	
 	return EXIT_SUCCESS;
 }
 int main( int argc , char* argv[] )
 {
-	int paramNum = sizeof(params)/sizeof(cmdLineReadable*);
-	cmdLineParse( argc-1 , &argv[1] , paramNum , params , 0 );
+	cmdLineParse( argc-1 , &argv[1] , params );
+	messageWriter.echoSTDOUT = Verbose.set;
 
 	if( !In.set || !Trim.set )
 	{
 		ShowUsage( argv[0] );
 		return EXIT_FAILURE;
 	}
-	bool readFlags[ PlyColorAndValueVertex< float >::ReadComponents ];
-	if( !PlyReadHeader( In.value , PlyColorAndValueVertex< float >::ReadProperties , PlyColorAndValueVertex< float >::ReadComponents , readFlags ) ) fprintf( stderr , "[ERROR] Failed to read ply header: %s\n" , In.value ) , exit( 0 );
+	typedef MultiPointStreamData< float , PointStreamValue< float > , PointStreamNormal< float , DIMENSION > , PointStreamColor< float > > VertexData;
+	typedef PlyVertexWithData< float , DIMENSION , VertexData > Vertex;
+	bool readFlags[ Vertex::PlyReadNum ];
+	if( !PlyReadHeader( In.value , Vertex::PlyReadProperties() , Vertex::PlyReadNum , readFlags ) ) ERROR_OUT( "Failed to read ply header: %s" , In.value );
 
-	bool hasValue = readFlags[3];
-	bool hasColor = ( readFlags[4] || readFlags[7] ) && ( readFlags[5] || readFlags[8] ) && ( readFlags[6] || readFlags[9] );
+	bool hasValue  = VertexData::ValidPlyReadProperties< 0 >( readFlags + DIMENSION );
+	bool hasNormal = VertexData::ValidPlyReadProperties< 1 >( readFlags + DIMENSION );
+	bool hasColor  = VertexData::ValidPlyReadProperties< 2 >( readFlags + DIMENSION );
 
-	if( !hasValue ) fprintf( stderr , "[ERROR] Ply file does not contain values\n" ) , exit( 0 );
-	if( hasColor ) return Execute< PlyColorAndValueVertex< float > >();
-	else           return Execute< PlyValueVertex< float > >();
+	if( !hasValue ) ERROR_OUT( "Ply file does not contain values" );
+
+	if( hasColor )
+		if( hasNormal ) return Execute< PointStreamNormal< float , DIMENSION > , PointStreamColor< float > >();
+		else            return Execute<                                          PointStreamColor< float > >();
+	else
+		if( hasNormal ) return Execute< PointStreamNormal< float , DIMENSION >                             >();
+		else            return Execute<                                                                    >();
 }
diff --git a/Src/VoxelCompare.cpp b/Src/VoxelCompare.cpp
new file mode 100644
index 0000000..c3a173a
--- /dev/null
+++ b/Src/VoxelCompare.cpp
@@ -0,0 +1,124 @@
+/*
+Copyright (c) 2016, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#undef FAST_COMPILE				// If enabled, only a single version of the reconstruction code is compiled
+#undef ARRAY_DEBUG				// If enabled, array access is tested for validity
+#define MAX_MEMORY_GB 15		// If non-zero, the maximum memory to be used by the application
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <float.h>
+#include "MyMiscellany.h"
+#include "CmdLineParser.h"
+#include "Array.h"
+
+cmdLineParameterArray< char* , 2 >
+	In( "in" );
+cmdLineParameter< char* >
+	Out( "out" );
+cmdLineParameter< float >
+	Scale( "scale" , 1.f );
+
+cmdLineReadable* params[] =
+{
+	&In , &Out , &Scale ,
+	NULL
+};
+
+void ShowUsage( char* ex )
+{
+	printf( "Usage: %s\n" , ex );
+	printf( "\t --%s <input voxel grid1 , input voxel grid 2>\n" , In.name );
+	printf( "\t[--%s <output voxel grid>]\n" , Out.name );
+	printf( "\t[--%s <output scale>=%f]\n" , Scale.name , Scale.value );
+}
+
+
+int main( int argc , char* argv[] )
+{
+	cmdLineParse( argc-1 , &argv[1] , params );
+	if( !In.set )
+	{
+		ShowUsage( argv[0] );
+		return EXIT_FAILURE;
+	}
+
+	auto ReadVoxel = []( const char* fileName , int& res )
+	{
+		FILE* fp = fopen( fileName , "rb" );
+		if( !fp ) fprintf( stderr , "[ERROR] Failed to read voxel: %s\n" , fileName ) , exit( 0 );
+		if( fread( &res , sizeof(int) , 1 , fp )!=1 ) fprintf( stderr , "[ERROR] Failed to read restolution from file: %s\n" , fileName ) , exit( 0 );
+		Pointer( float ) v = AllocPointer< float >( res*res*res );
+		if( !v ) fprintf( stderr , "[ERROR] Failed to allocate voxel grid: %d x %d x %d\n" , res , res , res ) , exit( 0 );
+		if( fread( v , sizeof(float) , res*res*res , fp )!=res*res*res ) fprintf( stderr , "[ERROR] Failed to read voxel values from file: %s\n" , fileName ) , exit( 0 );
+		fclose( fp );
+		return v;
+	};
+
+	int res;
+	Pointer( float ) v1 ; Pointer( float ) v2;
+	{
+		int res1 , res2;
+		v1 = ReadVoxel( In.values[0] , res1 );
+		v2 = ReadVoxel( In.values[1] , res2 );
+		if( res1!=res2 ) fprintf( stderr , "[ERROR] Voxel resolutions don't match: %d x %d\n" , res1 , res2 ) , exit( 0 );
+		res = res1;
+	}
+
+	double l1Error = 0 , l2Error = 0;
+	double l1Norm1 = 0 , l1Norm2 = 0 , l2Norm1 = 0 , l2Norm2 = 0;
+#pragma omp parallel for reduction ( + : l1Error , l2Error , l1Norm1 , l1Norm2 , l2Norm1 , l2Norm2 )
+	for( int i=0 ; i<res*res*res ; i++ )
+	{
+		l1Error += fabs( v1[i] - v2[i] ) , l1Norm1 += fabs( v1[i] ) , l1Norm2 += fabs( v2[i] );
+		l2Error += ( v1[i] - v2[i] ) * ( v1[i] - v2[i] ) , l2Norm1 += v1[i] * v1[i] , l2Norm2 += v2[i] * v2[i];
+	}
+	l1Error /= res*res*res , l1Norm1 /= res*res*res , l1Norm2 /= res*res*res;
+	l2Error /= res*res*res , l2Norm1 /= res*res*res , l2Norm2 /= res*res*res;
+	l2Error = sqrt( l2Error ) , l2Norm1 = sqrt( l2Norm1 ) , l2Norm2 = sqrt( l2Norm2 );
+	printf(  "L1 / L2 differences: %g %g\n" , 2*l1Error / ( l1Norm1 + l1Norm2 ) , 2*l2Error / ( l2Norm1 + l2Norm2 ) );
+
+	if( Out.set )
+	{
+		if( Scale.value<0 ) Scale.value = 1. / ( ( l1Norm1 + l1Norm2 ) / 2 );
+#pragma omp parallel for
+		for( int i=0 ; i<res*res*res ; i++ ) v1[i] = ( v1[i] - v2[i] ) * Scale.value;
+
+		FILE* fp = fopen( Out.value , "wb" );
+		if( !fp ) fprintf( stderr , "[ERROR] Failed to open file for writing: %s\n" , Out.value ) , exit( 0 );
+
+		if( fwrite( &res , sizeof(int) , 1 , fp )!=1 ) fprintf( stderr , "[ERROR] Failed to write voxel resolution\n" ) , fclose( fp ) , exit( 0 );
+		fwrite( v1 , sizeof(float) , res*res*res , fp );
+		fclose( fp );
+	}
+	FreePointer( v1 );
+	FreePointer( v2 );
+
+	return EXIT_SUCCESS;
+}
diff --git a/Src/Window.h b/Src/Window.h
new file mode 100644
index 0000000..d482205
--- /dev/null
+++ b/Src/Window.h
@@ -0,0 +1,435 @@
+/*
+Copyright (c) 2016, Michael Kazhdan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+#ifndef WINDOW_INCLUDED
+#define WINDOW_INCLUDED
+
+#include <functional>
+#include "Allocator.h"
+#include "Array.h"
+
+//////////////////////////////////////////////////////////
+// Some basic functionality for integer parameter packs //
+//////////////////////////////////////////////////////////
+
+// A wrapper class for passing unsigned integer parameter packs
+template< unsigned int  ... Values > struct UIntPack{};
+template< unsigned int _Value , unsigned int ... _Values > struct UIntPack< _Value , _Values ... >
+{
+	static const unsigned int First = _Value;
+	typedef UIntPack< _Values ... > Rest;
+
+	static const unsigned int Size = 1 + sizeof ... ( _Values );
+	template< unsigned int __Value > using  Append = UIntPack< _Value , _Values ... , __Value >;
+	template< unsigned int __Value > using Prepend = UIntPack< __Value , _Value , _Values ... >;
+	static const unsigned int Values[];
+	static constexpr unsigned int Min( void ){ return _Value < Rest::Min() ? _Value : Rest::Min(); }
+	static constexpr unsigned int Max( void ){ return _Value > Rest::Max() ? _Value : Rest::Max(); }
+
+	template< typename T > struct Plus{};
+	template< typename T > struct Minus{};
+	template< typename T > struct Compare{};
+	template< unsigned int __Value , unsigned int ... __Values > struct Plus < UIntPack< __Value , __Values ... > >{ typedef typename Rest::template Plus < UIntPack< __Values ... > >::type::template Prepend< _Value + __Value > type; };
+	template< unsigned int __Value , unsigned int ... __Values > struct Minus< UIntPack< __Value , __Values ... > >{ typedef typename Rest::template Minus< UIntPack< __Values ... > >::type::template Prepend< _Value - __Value > type; };
+	template< unsigned int __Value , unsigned int ... __Values > struct Compare< UIntPack< __Value , __Values ... > >
+	{
+		static const bool              Equal = _Value==__Value && Rest::template Compare< UIntPack< __Values ... > >::             Equal;
+		static const bool           NotEqual = _Value!=__Value || Rest::template Compare< UIntPack< __Values ... > >::          NotEqual;
+		static const bool    LessThan        = _Value< __Value && Rest::template Compare< UIntPack< __Values ... > >::   LessThan       ;
+		static const bool    LessThanOrEqual = _Value<=__Value && Rest::template Compare< UIntPack< __Values ... > >::   LessThanOrEqual;
+		static const bool GreaterThan        = _Value> __Value && Rest::template Compare< UIntPack< __Values ... > >::GreaterThan       ;
+		static const bool GreaterThanOrEqual = _Value>=__Value && Rest::template Compare< UIntPack< __Values ... > >::GreaterThanOrEqual;
+	};
+
+	static void Print( FILE* fp=stdout , bool leadingSpace=false ){ if( leadingSpace ) fprintf( fp , " " ) ; fprintf( fp , "%d" , _Value ) ; Rest::Print( fp , true ); }
+
+	template< unsigned int I > constexpr static typename std::enable_if< I==0 , unsigned int >::type Get( void ){ return _Value; }
+	template< unsigned int I > constexpr static typename std::enable_if< I!=0 , unsigned int >::type Get( void ){ return Rest::template Get< I-1 >(); }
+
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator <  ( UIntPack< __Value , __Values ... > ) const { return _Value< __Value && Rest()< UIntPack< __Values ... >(); }
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator <= ( UIntPack< __Value , __Values ... > ) const { return _Value<=__Value && Rest()<=UIntPack< __Values ... >(); }
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator >  ( UIntPack< __Value , __Values ... > ) const { return _Value> __Value && Rest()> UIntPack< __Values ... >(); }
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator >= ( UIntPack< __Value , __Values ... > ) const { return _Value>=__Value && Rest()>=UIntPack< __Values ... >(); }
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator == ( UIntPack< __Value , __Values ... > ) const { return _Value==__Value && Rest()==UIntPack< __Values ... >(); }
+	template< unsigned int __Value , unsigned int ... __Values > constexpr bool operator != ( UIntPack< __Value , __Values ... > ) const { return _Value!=__Value && Rest()!=UIntPack< __Values ... >(); }
+};
+template< unsigned int _Value > struct UIntPack< _Value >
+{
+	static const unsigned int First = _Value;
+
+	static const unsigned int Size = 1;
+	template< unsigned int __Value > using  Append = UIntPack< _Value , __Value >;
+	template< unsigned int __Value > using Prepend = UIntPack< __Value , _Value >;
+	static const unsigned int Values[];
+	static constexpr unsigned int Min( void ){ return _Value; }
+	static constexpr unsigned int Max( void ){ return _Value; }
+
+	template< typename T > struct Plus{};
+	template< typename T > struct Minus{};
+	template< typename T > struct Compare{};
+	template< unsigned int __Value > struct Plus < UIntPack< __Value > >{ typedef UIntPack< _Value + __Value > type; };
+	template< unsigned int __Value > struct Minus< UIntPack< __Value > >{ typedef UIntPack< _Value - __Value > type; };
+	template< unsigned int __Value > struct Compare< UIntPack< __Value > >
+	{
+		static const bool              Equal = _Value==__Value;
+		static const bool           NotEqual = _Value!=__Value;
+		static const bool    LessThan        = _Value< __Value;
+		static const bool    LessThanOrEqual = _Value<=__Value;
+		static const bool GreaterThan        = _Value> __Value;
+		static const bool GreaterThanOrEqual = _Value>=__Value;
+	};
+
+	static void Print( FILE* fp=stdout , bool leadingSpace=false ){ if( leadingSpace ) fprintf( fp , " " ) ; fprintf( fp , "%d" , _Value ); }
+	template< unsigned int I > constexpr static unsigned int Get( void ){ static_assert( I==0 , "[ERROR] UIntPack< Value >::Get called with non-zero index" ) ; return _Value; }
+
+	template< unsigned int __Value > constexpr bool operator <  ( UIntPack< __Value > ) const { return _Value< __Value; }
+	template< unsigned int __Value > constexpr bool operator <= ( UIntPack< __Value > ) const { return _Value<=__Value; }
+	template< unsigned int __Value > constexpr bool operator >  ( UIntPack< __Value > ) const { return _Value> __Value; }
+	template< unsigned int __Value > constexpr bool operator >= ( UIntPack< __Value > ) const { return _Value>=__Value; }
+	template< unsigned int __Value > constexpr bool operator == ( UIntPack< __Value > ) const { return _Value==__Value; }
+	template< unsigned int __Value > constexpr bool operator != ( UIntPack< __Value > ) const { return _Value!=__Value; }
+};
+template< unsigned int _Value , unsigned int ... _Values > const unsigned int UIntPack< _Value , _Values ... >::Values[] = { _Value , _Values ... };
+template< unsigned int _Value > const unsigned int UIntPack< _Value >::Values[] = { _Value };
+template< unsigned int ... V1 , unsigned int ... V2 > typename UIntPack< V1 ... >::template Plus < UIntPack< V2 ... > >::type operator + ( UIntPack< V1 ... > , UIntPack< V2 ... > ){ return typename UIntPack< V1 ... >::template Plus < UIntPack< V2 ... > >::type(); }
+template< unsigned int ... V1 , unsigned int ... V2 > typename UIntPack< V1 ... >::template Minus< UIntPack< V2 ... > >::type operator - ( UIntPack< V1 ... > , UIntPack< V2 ... > ){ return typename UIntPack< V1 ... >::template Minus< UIntPack< V2 ... > >::type(); }
+
+template< int ... Values > struct IntPack{};
+template< int _Value , int ... _Values > struct IntPack< _Value , _Values ... >
+{
+	static const int First = _Value;
+	typedef IntPack< _Values ... > Rest;
+
+	static const unsigned int Size = 1 + sizeof ... ( _Values );
+	template< int __Value > using  Append = IntPack< _Value , _Values ... , __Value >;
+	template< int __Value > using Prepend = IntPack< __Value , _Value , _Values ... >;
+	static const int Values[];
+	static constexpr int Min( void ){ return _Value < Rest::Min ? _Value : Rest::Min; }
+	static constexpr int Max( void ){ return _Value > Rest::Max ? _Value : Rest::Max; }
+
+	template< typename T > struct Plus{};
+	template< typename T > struct Minus{};
+	template< typename T > struct Compare{};
+	template< int __Value , int ... __Values > struct Plus < IntPack< __Value , __Values ... > >{ typedef typename Rest::template Plus < IntPack< __Values ... > >::type::template Prepend< _Value + __Value > type; };
+	template< int __Value , int ... __Values > struct Minus< IntPack< __Value , __Values ... > >{ typedef typename Rest::template Minus< IntPack< __Values ... > >::type::template Prepend< _Value - __Value > type; };
+	template< int __Value , int ... __Values > struct Compare< IntPack< __Value , __Values ... > >
+	{
+		static const bool              Equal = _Value==__Value && Rest::template Compare< IntPack< __Values ... > >::             Equal;
+		static const bool           NotEqual = _Value!=__Value || Rest::template Compare< IntPack< __Values ... > >::          NotEqual;
+		static const bool    LessThan        = _Value< __Value && Rest::template Compare< IntPack< __Values ... > >::   LessThan       ;
+		static const bool    LessThanOrEqual = _Value<=__Value && Rest::template Compare< IntPack< __Values ... > >::   LessThanOrEqual;
+		static const bool GreaterThan        = _Value> __Value && Rest::template Compare< IntPack< __Values ... > >::GreaterThan       ;
+		static const bool GreaterThanOrEqual = _Value>=__Value && Rest::template Compare< IntPack< __Values ... > >::GreaterThanOrEqual;
+	};
+
+	static void Print( FILE* fp=stdout , bool leadingSpace=false ){ if( leadingSpace ) fprintf( fp , " " ) ; fprintf( fp , "%d" , _Value ) ; Rest::Print( fp , true ); }
+
+	template< unsigned int I > constexpr static typename std::enable_if< I==0 , unsigned int >::type Get( void ){ return _Value; }
+	template< unsigned int I > constexpr static typename std::enable_if< I!=0 , unsigned int >::type Get( void ){ return Rest::template Get< I-1 >(); }
+
+	template< int __Value , int ... __Values > constexpr bool operator <  ( IntPack< __Value , __Values ... > ) const { return _Value< __Value && Rest()< IntPack< __Values ... >(); }
+	template< int __Value , int ... __Values > constexpr bool operator <= ( IntPack< __Value , __Values ... > ) const { return _Value<=__Value && Rest()<=IntPack< __Values ... >(); }
+	template< int __Value , int ... __Values > constexpr bool operator >  ( IntPack< __Value , __Values ... > ) const { return _Value> __Value && Rest()> IntPack< __Values ... >(); }
+	template< int __Value , int ... __Values > constexpr bool operator >= ( IntPack< __Value , __Values ... > ) const { return _Value>=__Value && Rest()>=IntPack< __Values ... >(); }
+	template< int __Value , int ... __Values > constexpr bool operator == ( IntPack< __Value , __Values ... > ) const { return _Value==__Value && Rest()==IntPack< __Values ... >(); }
+	template< int __Value , int ... __Values > constexpr bool operator != ( IntPack< __Value , __Values ... > ) const { return _Value!=__Value && Rest()!=IntPack< __Values ... >(); }
+};
+template< int _Value > struct IntPack< _Value >
+{
+	static const int First = _Value;
+
+	static const unsigned int Size = 1;
+	template< int __Value > using  Append = IntPack< _Value , __Value >;
+	template< int __Value > using Prepend = IntPack< __Value , _Value >;
+	static const int Values[];
+	static constexpr int Min( void ){ return _Value; }
+	static constexpr int Max( void ){ return _Value; }
+
+	template< typename T > struct Plus{};
+	template< typename T > struct Minus{};
+	template< typename T > struct Compare{};
+	template< int __Value > struct Plus < IntPack< __Value > >{ typedef IntPack< _Value + __Value > type; };
+	template< int __Value > struct Minus< IntPack< __Value > >{ typedef IntPack< _Value - __Value > type; };
+	template< int __Value > struct Compare< IntPack< __Value > >
+	{
+		static const bool              Equal = _Value==__Value;
+		static const bool           NotEqual = _Value!=__Value;
+		static const bool    LessThan        = _Value< __Value;
+		static const bool    LessThanOrEqual = _Value<=__Value;
+		static const bool GreaterThan        = _Value> __Value;
+		static const bool GreaterThanOrEqual = _Value>=__Value;
+	};
+
+	static void Print( FILE* fp=stdout , bool leadingSpace=false ){ if( leadingSpace ) fprintf( fp , " " ) ; fprintf( fp , "%d" , _Value ); }
+	template< unsigned int I > constexpr static unsigned int Get( void ){ static_assert( I==0 , "[ERROR] IntPack< Value >::Get called with non-zero index" ) ; return _Value; }
+
+	template< int __Value > constexpr bool operator <  ( IntPack< __Value > ) const { return _Value< __Value; }
+	template< int __Value > constexpr bool operator <= ( IntPack< __Value > ) const { return _Value<=__Value; }
+	template< int __Value > constexpr bool operator >  ( IntPack< __Value > ) const { return _Value> __Value; }
+	template< int __Value > constexpr bool operator >= ( IntPack< __Value > ) const { return _Value>=__Value; }
+	template< int __Value > constexpr bool operator == ( IntPack< __Value > ) const { return _Value==__Value; }
+	template< int __Value > constexpr bool operator != ( IntPack< __Value > ) const { return _Value!=__Value; }
+};
+template< int _Value , int ... _Values > const int IntPack< _Value , _Values ... >::Values[] = { _Value , _Values ... };
+template< int _Value > const int IntPack< _Value >::Values[] = { _Value };
+template< int ... V1 , int ... V2 > typename IntPack< V1 ... >::template Plus < IntPack< V2 ... > >::type operator + ( IntPack< V1 ... > , IntPack< V2 ... > ){ return typename IntPack< V1 ... >::template Plus < IntPack< V2 ... > >::type(); }
+template< int ... V1 , int ... V2 > typename IntPack< V1 ... >::template Minus< IntPack< V2 ... > >::type operator - ( IntPack< V1 ... > , IntPack< V2 ... > ){ return typename IntPack< V1 ... >::template Minus< IntPack< V2 ... > >::type(); }
+
+///////////////////////////
+// The isotropic variant //
+///////////////////////////
+template< unsigned int Dim , unsigned int Value > struct _IsotropicUIntPack             { typedef typename _IsotropicUIntPack< Dim-1 , Value >::type::template Append< Value > type; };
+template<                    unsigned int Value > struct _IsotropicUIntPack< 1 , Value >{ typedef UIntPack< Value > type; };
+template<                    unsigned int Value > struct _IsotropicUIntPack< 0 , Value >{ typedef UIntPack< > type; };
+template< unsigned int Dim , unsigned int Value > using IsotropicUIntPack = typename _IsotropicUIntPack< Dim , Value >::type;
+template< unsigned int Dim > using ZeroUIntPack = IsotropicUIntPack< Dim , 0 >;
+
+template< int Dim , int Value > struct _IsotropicIntPack             { typedef typename _IsotropicUIntPack< Dim-1 , Value >::type::template Append< Value > type; };
+template<           int Value > struct _IsotropicIntPack< 1 , Value >{ typedef IntPack< Value > type; };
+template<           int Value > struct _IsotropicIntPack< 0 , Value >{ typedef IntPack< > type; };
+template< int Dim , int Value > using IsotropicIntPack = typename _IsotropicIntPack< Dim , Value >::type;
+template< int Dim > using ZeroIntPack = IsotropicIntPack< Dim , 0 >;
+/////////////////////////////
+// And now for the windows //
+/////////////////////////////
+template< typename T > struct WindowSize{};
+template< typename T1 , typename T2 > struct WindowIndex{};
+
+template< unsigned int Res , unsigned int ... Ress > struct WindowSize< UIntPack< Res , Ress ... > >{ static const unsigned int Size = WindowSize< UIntPack< Ress ... > >::Size * Res; };
+template< unsigned int Res                         > struct WindowSize< UIntPack< Res            > >{ static const unsigned int Size = Res; };
+
+template< unsigned int Res , unsigned int ... Ress , unsigned int Idx , unsigned int ... Idxs > struct WindowIndex< UIntPack< Res , Ress ... > , UIntPack< Idx , Idxs ... > >{ static const unsigned int Index = Idx * WindowSize< UIntPack< Ress ... > >::Size + WindowIndex< UIntPack< Ress ... > , UIntPack< Idxs ... > >::Index; };
+template< unsigned int Res                         , unsigned int Idx                         > struct WindowIndex< UIntPack< Res            > , UIntPack< Idx            > >{ static const unsigned int Index = Idx; };
+
+template< unsigned int Res , unsigned int ... Ress > typename std::enable_if< (sizeof...(Ress)!=0) , unsigned int >::type GetWindowIndex( UIntPack< Res , Ress ... > , const unsigned int idx[] ){ return idx[0] * WindowSize< UIntPack< Ress ... > >::Size + GetWindowIndex( UIntPack< Ress ... >() , idx+1 ); };
+template< unsigned int Res                         > unsigned int GetWindowIndex( UIntPack< Res > , const unsigned int idx[] ){ return idx[0]; }
+
+template< unsigned int Res , unsigned int ... Ress > typename std::enable_if< (sizeof...(Ress)!=0) , unsigned int >::type GetWindowIndex( UIntPack< Res , Ress ... > , const int idx[] ){ return idx[0] * WindowSize< UIntPack< Ress ... > >::Size + GetWindowIndex( UIntPack< Ress ... >() , idx+1 ); };
+template< unsigned int Res                         > unsigned int GetWindowIndex( UIntPack< Res > , const int idx[] ){ return idx[0]; }
+
+template< typename Data , typename Pack > struct   ConstWindowSlice{};
+template< typename Data , typename Pack > struct        WindowSlice{};
+template< typename Data , typename Pack > struct  StaticWindow     {};
+template< typename Data , typename Pack > struct DynamicWindow     {};
+
+
+template< class Data , unsigned int ... Ress >
+struct ConstWindowSlice< Data , UIntPack< Ress ... > >
+{
+	typedef UIntPack< Ress ... > Pack;
+	static const unsigned int Size = WindowSize< Pack >::Size;
+	typedef Data data_type;
+	typedef const Data& data_reference_type;
+	typedef const Data& const_data_reference_type;
+	ConstWindowSlice( Pointer( Data ) d ) : data(d) { ; }
+	ConstWindowSlice( ConstPointer( Data ) d ) : data(d) { ; }
+	ConstWindowSlice< Data , typename Pack::Rest > operator[]( int idx ) const { return ConstWindowSlice< Data , typename Pack::Rest >( data + WindowSize< typename Pack::Rest >::Size * idx ); }
+	data_reference_type operator()( const          int idx[sizeof...(Ress)] ) const { return data[ GetWindowIndex( UIntPack< Ress ... >() , idx ) ]; }
+	data_reference_type operator()( const unsigned int idx[sizeof...(Ress)] ) const { return data[ GetWindowIndex( UIntPack< Ress ... >() , idx ) ]; }
+	ConstPointer( Data ) data;
+};
+template< class Data , unsigned int Res >
+struct ConstWindowSlice< Data , UIntPack< Res > >
+{
+	typedef UIntPack< Res > Pack;
+	static const unsigned int Size = Res;
+	typedef Data data_type;
+	typedef const Data& data_reference_type;
+	typedef const Data& const_data_reference_type;
+	ConstWindowSlice( Pointer( Data ) d ) : data(d) { ; }
+	ConstWindowSlice( ConstPointer( Data ) d ) : data(d) { ; }
+	inline data_reference_type operator[]( int idx ) const { return data[idx]; }
+	data_reference_type operator()( const          int idx[1] ) const { return data[ idx[0] ]; }
+	data_reference_type operator()( const unsigned int idx[1] ) const { return data[ idx[0] ]; }
+	ConstPointer( Data ) data;
+};
+template< class Data , unsigned int ... Ress >
+struct WindowSlice< Data , UIntPack< Ress ... > >
+{
+	typedef UIntPack< Ress ... > Pack;
+	static const unsigned int Size = WindowSize< Pack >::Size;
+	typedef Data data_type;
+	typedef Data& data_reference_type;
+	typedef const Data& const_data_reference_type;
+	WindowSlice( Pointer( Data ) d ) : data(d) { ; }
+	WindowSlice< Data , typename Pack::Rest > operator[]( int idx ){ return WindowSlice< Data , typename Pack::Rest >( data + WindowSize< typename Pack::Rest >::Size * idx ); }
+	inline data_reference_type operator()( const int idx[sizeof...(Ress)] ){ return (*this)[ idx[0] ]( idx+1 ); }
+	const_data_reference_type operator()( const int idx[sizeof...(Ress)] ) const { return (*this)[ idx[0] ]( idx+1 ); }
+	operator ConstWindowSlice< Data , Pack >() const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )data ); }
+	Pointer( Data ) data;
+};
+template< class Data , unsigned int Res >
+struct WindowSlice< Data , UIntPack< Res > >
+{
+	typedef UIntPack< Res > Pack;
+	static const unsigned int Size = Res;
+	typedef Data data_type;
+	typedef Data& data_reference_type;
+	typedef const Data& const_data_reference_type;
+	WindowSlice( Pointer( Data ) d ) : data(d) { ; }
+	inline data_reference_type operator[]( int idx ){ return data[idx]; }
+	inline const_data_reference_type operator[]( int idx ) const { return data[idx]; }
+	data_reference_type operator()( const int idx[1] ){ return (*this)[ idx[0] ]; }
+	const_data_reference_type operator()( const int idx[1] ) const { return (*this)[ idx[0] ]; }
+	operator ConstWindowSlice< Data , Pack >() const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )data ); }
+	Pointer( Data ) data;
+};
+
+template< class Data , unsigned int ... Ress >
+struct StaticWindow< Data , UIntPack< Ress ... > >
+{
+	typedef UIntPack< Ress ... > Pack;
+#if defined( __GNUC__ ) && defined( DEBUG )
+#warning "you've got me gcc"
+	static const unsigned int Size;
+#else // !( __GNUC__ && DEBUG )
+	static const unsigned int Size = WindowSize< Pack >::Size;
+#endif // ( __GNUC__ && DEBUG )
+	typedef ConstWindowSlice< Data , Pack > const_window_slice_type;
+	typedef WindowSlice< Data , Pack > window_slice_type;
+	typedef Data data_type;
+	WindowSlice< Data , typename Pack::Rest > operator[]( int idx ){ return WindowSlice< Data , typename Pack::Rest >( GetPointer( data , WindowSize< Pack >::Size ) + WindowSize< typename Pack::Rest >::Size * idx ); }
+	ConstWindowSlice< Data , typename Pack::Rest > operator[]( int idx ) const { return ConstWindowSlice< Data , typename Pack::Rest >( ( ConstPointer( Data ) )GetPointer( data , WindowSize< Pack >::Size ) + WindowSize< typename Pack::Rest >::Size * idx ); }
+	WindowSlice< Data , Pack > operator()( void ){ return WindowSlice< Data , Pack >( GetPointer( data , WindowSize< Pack >::Size ) ); }
+	ConstWindowSlice< Data , Pack > operator()( void ) const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )GetPointer( data , WindowSize< Pack >::Size ) ); }
+	Data& operator()( const int idx[sizeof...(Ress)] ){ return (*this)()( idx ); }
+	const Data& operator()( const unsigned int idx[sizeof...(Ress)] ) const { return data[ GetWindowIndex( UIntPack< Ress ... >() , idx ) ]; }
+	const Data& operator()( const          int idx[sizeof...(Ress)] ) const { return data[ GetWindowIndex( UIntPack< Ress ... >() , idx ) ]; }
+	Data data[ WindowSize< Pack >::Size ];
+};
+#if defined( __GNUC__ ) && defined( DEBUG )
+template< class Data , unsigned int ... Ress >
+const unsigned int StaticWindow< Data , UIntPack< Ress ... > >::Size = WindowSize< UIntPack< Ress ... > >::Size;
+#endif // ( __GNUC__ && DEBUG )
+template< class Data , unsigned int Res >
+struct StaticWindow< Data , UIntPack< Res > >
+{
+	typedef UIntPack< Res > Pack;
+#if defined( __GNUC__ ) && defined( DEBUG )
+#warning "you've got me gcc"
+	static const unsigned int Size;
+#else // !( __GNUC__ && DEBUG )
+	static const unsigned int Size = Res;
+#endif // ( __GNUC__ && DEBUG )
+	typedef Data data_type;
+	Data& operator[]( int idx ){ return data[idx]; };
+	const Data& operator[]( int idx ) const { return data[idx]; };
+	WindowSlice< Data , Pack > operator()( void ){ return WindowSlice< Data , Pack >( GetPointer( data , WindowSize< Pack >::Size ) ); }
+	ConstWindowSlice< Data , Pack > operator()( void ) const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )GetPointer( data , WindowSize< Pack >::Size ) ); }
+	Data& operator()( const int idx[1] ){ return (*this)()( idx ); }
+	const Data& operator()( const unsigned int idx[1] ) const { return data[ idx[0] ]; }
+	const Data& operator()( const          int idx[1] ) const { return data[ idx[0] ]; }
+	Data data[ Res ];
+};
+#if defined( __GNUC__ ) && defined( DEBUG )
+template< class Data , unsigned int Res >
+const unsigned int StaticWindow< Data , UIntPack< Res > >::Size = Res;
+#endif // ( __GNUC__ && DEBUG )
+
+template< class Data , unsigned int ... Ress >
+struct DynamicWindow< Data , UIntPack< Ress ... > >
+{
+	typedef UIntPack< Ress ... > Pack;
+	static const unsigned int Size = WindowSize< Pack >::Size;
+	typedef ConstWindowSlice< Data , Pack > const_window_slice_type;
+	typedef WindowSlice< Data , Pack > window_slice_type;
+	typedef Data data_type;
+	WindowSlice< Data , typename Pack::Rest > operator[]( int idx ){ return WindowSlice< Data , typename Pack::Rest >( data + WindowSize< typename Pack::Rest >::Size * idx ); }
+	ConstWindowSlice< Data , typename Pack::Rest > operator[]( int idx ) const { return ConstWindowSlice< Data , typename Pack::Rest >( ( ConstPointer( Data ) )( data + WindowSize< typename Pack::Rest >::Size * idx ) ); }
+	WindowSlice< Data , Pack > operator()( void ){ return WindowSlice< Data , Pack >( data ); }
+	ConstWindowSlice< Data , Pack > operator()( void ) const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )data ); }
+	Data& operator()( const int idx[sizeof...(Ress)+1] ){ return (*this)()( idx ); }
+	const Data& operator()( const int idx[sizeof...(Ress)+1] ) const { return (*this)()( idx ); }
+
+	DynamicWindow( void ){ data = NewPointer< Data >( WindowSize< Pack >::Size ); }
+	~DynamicWindow( void ){ DeletePointer( data ); }
+	Pointer( Data ) data;
+};
+template< class Data , unsigned int Res >
+struct DynamicWindow< Data , UIntPack< Res > >
+{
+	typedef UIntPack< Res > Pack;
+	static const unsigned int Size = Res;
+	typedef Data data_type;
+	Data& operator[]( int idx ){ return data[idx]; };
+	const Data& operator[]( int idx ) const { return data[idx]; };
+	WindowSlice< Data , Pack > operator()( void ) { return WindowSlice< Data , Pack >( data ); }
+	ConstWindowSlice< Data , Pack > operator()( void ) const { return ConstWindowSlice< Data , Pack >( ( ConstPointer( Data ) )data ); }
+	Data& operator()( const int idx[1] ){ return (*this)()( idx ); }
+	const Data& operator()( const int idx[1] ) const { return (*this)()( idx ); }
+
+	DynamicWindow( void ){ data = NewPointer< Data >( Res ); }
+	~DynamicWindow( void ){ DeletePointer( data ); }
+	Pointer( Data ) data;
+};
+
+// Recursive loop iterations for processing window slices
+//		WindowDimension: the the window slice
+//		IterationDimensions: the number of dimensions to process
+//		Res: the resolution of the window
+
+template< unsigned int WindowDimension , unsigned int IterationDimensions , unsigned int CurrentIteration > struct _WindowLoop;
+template< unsigned int WindowDimension , unsigned int IterationDimensions=WindowDimension >
+struct WindowLoop
+{
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::Run( begin , end , updateState , function , w ... ); 
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::Run( begin , end , updateState , function , w ... ); 
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::Run( begin , end , updateState , function , w ... ); 
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::RunParallel( begin , end , updateState , function , w ... ); 
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::RunParallel( begin , end , updateState , function , w ... ); 
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction updateState , ProcessFunction function , Windows ... w )
+	{
+		_WindowLoop< WindowDimension , IterationDimensions , IterationDimensions >::RunParallel( begin , end , updateState , function , w ... ); 
+	}
+};
+
+#include "Window.inl"
+
+#endif // WINDOW_INCLUDED
diff --git a/Src/Window.inl b/Src/Window.inl
new file mode 100644
index 0000000..4a7a511
--- /dev/null
+++ b/Src/Window.inl
@@ -0,0 +1,378 @@
+/*
+Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer. Redistributions in binary form must reproduce
+the above copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the distribution. 
+
+Neither the name of the Johns Hopkins University nor the names of its contributors
+may be used to endorse or promote products derived from this software without specific
+prior written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+*/
+
+//////////////////////////////////////////
+// IterationDimension < WindowDimension //
+//////////////////////////////////////////
+template< unsigned int WindowDimension , unsigned int IterationDimensions , unsigned int CurrentIteration >
+struct _WindowLoop
+{
+protected:
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( begin , end , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( begin+1 , end+1 , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , updateState , function , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin ; i<end ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin , end , thread , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin[0] ; i<end[0] ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin+1 , end+1 , thread , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , thread , updateState , function , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin , end , thread , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin+1 , end+1 , thread , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , thread , updateState , function , w[i] ... ); }
+	}
+};
+//////////////////////////////////////////
+// IterationDimension = WindowDimension //
+//////////////////////////////////////////
+template< unsigned int WindowDimension , unsigned int CurrentIteration >
+struct _WindowLoop< WindowDimension , WindowDimension , CurrentIteration >
+{
+protected:
+	static const int IterationDimensions = WindowDimension;
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( begin , end , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( begin+1 , end+1 , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::Run( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , updateState , function , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin ; i<end ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin , end , thread , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin[0] ; i<end[0] ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin+1 , end+1 , thread , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , thread , updateState , function , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin , end , thread , updateState , function , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( begin+1 , end+1 , thread , updateState , function , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration-1 >::RunThread( typename UIntPack< Begin ... >::Rest() , typename UIntPack< End ... >::Rest() , thread , updateState , function , w[i] ... ); }
+	}
+};
+///////////////////////////////////////////////////////////////////
+// IterationDimension < WindowDimension and CurrentIteration = 1 //
+///////////////////////////////////////////////////////////////////
+template< unsigned int WindowDimension , unsigned int IterationDimensions >
+struct _WindowLoop< WindowDimension , IterationDimensions , 1 >
+{
+protected:
+	static const unsigned int CurrentIteration = 1;
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin ; i<end ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin[0] ; i<end[0] ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+};
+///////////////////////////////////////////////////////////////////
+// IterationDimension = WindowDimension and CurrentIteration = 1 //
+///////////////////////////////////////////////////////////////////
+template< unsigned int WindowDimension >
+struct _WindowLoop< WindowDimension , WindowDimension , 1 >
+{
+protected:
+	static const unsigned int CurrentIteration = 1;
+	static const int IterationDimensions = WindowDimension;
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin ; i<end ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin[0] ; i<end[0] ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+};
+/////////////////////////////////////////////////////////////////
+// IterationDimension = WindowDimension = CurrentIteration = 1 //
+////////////////////////////////////////////////////////////////
+template<  >
+struct _WindowLoop< 1 , 1 , 1 >
+{
+protected:
+	static const unsigned int CurrentIteration = 1;
+	static const int WindowDimension = 1;
+	static const int IterationDimensions = WindowDimension;
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( WindowDimension - CurrentDimension , i ) ; function( w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin ; i<end ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=begin[0] ; i<end[0] ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+#pragma omp parallel for
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ int thread = omp_get_thread_num() ; updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin ; i<end ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=begin[0] ; i<end[0] ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w )
+	{
+		for( int i=UIntPack< Begin ... >::First ; i<UIntPack< End ... >::First ; i++ ){ updateState( thread , WindowDimension - CurrentDimension , i ) ; function( thread , w[i] ... ); }
+	}
+};
+//////////////////////////
+// CurrentIteration = 0 //
+//////////////////////////
+template< unsigned int WindowDimension , unsigned int IterationDimensions >
+struct _WindowLoop< WindowDimension , IterationDimensions , 0 >
+{
+protected:
+	static const unsigned int CurrentIteration = 0;
+	static const int CurrentDimension = CurrentIteration + WindowDimension - IterationDimensions;
+	friend struct WindowLoop< WindowDimension , IterationDimensions >;
+	friend struct _WindowLoop< WindowDimension , IterationDimensions , CurrentIteration+1 >;
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void Run( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( int begin , int end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( const int* begin , const int* end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunParallel( UIntPack< Begin ... > begin , UIntPack< End ... > end , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( int begin , int end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( const int* begin , const int* end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+	template< unsigned int ... Begin , unsigned int ... End , typename UpdateFunction , typename ProcessFunction , class ... Windows >
+	static void RunThread( UIntPack< Begin ... > begin , UIntPack< End ... > end , int thread , UpdateFunction& updateState , ProcessFunction& function , Windows ... w ){ ERROR_OUT( "Shouldn't be here" ); }
+};
diff --git a/SurfaceTrimmer.vcxproj b/SurfaceTrimmer.vcxproj
index 383f678..ede8197 100644
--- a/SurfaceTrimmer.vcxproj
+++ b/SurfaceTrimmer.vcxproj
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
@@ -23,31 +23,30 @@
     <RootNamespace>MeshClipper</RootNamespace>
     <Keyword>Win32Proj</Keyword>
     <ProjectName>SurfaceTrimmer</ProjectName>
-    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>Unicode</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>Unicode</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>Unicode</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>Unicode</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -68,9 +67,9 @@
   <PropertyGroup>
     <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)Bin\$(Platform)\$(Configuration)\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Configuration)\</OutDir>
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)Bin\$(Platform)\$(Configuration)\</OutDir>
@@ -113,14 +112,13 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -136,7 +134,6 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -165,19 +162,9 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp" />
     <ClCompile Include="Src\PlyFile.cpp" />
     <ClCompile Include="Src\SurfaceTrimmer.cpp" />
   </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\CmdLineParser.h" />
-    <ClInclude Include="Src\MyTime.h" />
-    <ClInclude Include="Src\Ply.h" />
-    <ClInclude Include="Src\PlyFile.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\CmdLineParser.inl" />
-  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/SurfaceTrimmer.vcxproj.filters b/SurfaceTrimmer.vcxproj.filters
deleted file mode 100644
index b6ea680..0000000
--- a/SurfaceTrimmer.vcxproj.filters
+++ /dev/null
@@ -1,47 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClCompile Include="Src\CmdLineParser.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\SurfaceTrimmer.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="Src\PlyFile.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="Src\CmdLineParser.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\Ply.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\PlyFile.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="Src\MyTime.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{237cb93d-2caa-4f6b-9bb3-5d597e5fd1dd}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{e857a52e-da0b-4eea-9e27-7b9c21e142c9}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Include Files">
-      <UniqueIdentifier>{61c77966-1174-4dae-9079-00d39bbe4aab}</UniqueIdentifier>
-      <Extensions>inc;inl</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Src\CmdLineParser.inl">
-      <Filter>Include Files</Filter>
-    </None>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/ZLIB.vcxproj b/ZLIB.vcxproj
new file mode 100644
index 0000000..2514f45
--- /dev/null
+++ b/ZLIB.vcxproj
@@ -0,0 +1,175 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="ZLIB\adler32.c" />
+    <ClCompile Include="ZLIB\compress.c" />
+    <ClCompile Include="ZLIB\crc32.c" />
+    <ClCompile Include="ZLIB\deflate.c" />
+    <ClCompile Include="ZLIB\gzio.c" />
+    <ClCompile Include="ZLIB\infblock.c" />
+    <ClCompile Include="ZLIB\infcodes.c" />
+    <ClCompile Include="ZLIB\inffast.c" />
+    <ClCompile Include="ZLIB\inflate.c" />
+    <ClCompile Include="ZLIB\inftrees.c" />
+    <ClCompile Include="ZLIB\infutil.c" />
+    <ClCompile Include="ZLIB\trees.c" />
+    <ClCompile Include="ZLIB\uncompr.c" />
+    <ClCompile Include="ZLIB\zutil.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="ZLIB\deflate.h" />
+    <ClInclude Include="ZLIB\infblock.h" />
+    <ClInclude Include="ZLIB\infcodes.h" />
+    <ClInclude Include="ZLIB\inffast.h" />
+    <ClInclude Include="ZLIB\inffixed.h" />
+    <ClInclude Include="ZLIB\inftrees.h" />
+    <ClInclude Include="ZLIB\infutil.h" />
+    <ClInclude Include="ZLIB\trees.h" />
+    <ClInclude Include="ZLIB\zconf.h" />
+    <ClInclude Include="ZLIB\zlib.h" />
+    <ClInclude Include="ZLIB\zutil.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{D3D173AB-D306-4179-BEC4-95CE1B14E647}</ProjectGuid>
+    <RootNamespace>ZLIB</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)Intermediate\$(ProjectName)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Bin\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)\Obj\$(TargetName)\$(Platform)\$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>.</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ZLIB/Make_vms.com b/ZLIB/Make_vms.com
new file mode 100644
index 0000000..1c57e8f
--- /dev/null
+++ b/ZLIB/Make_vms.com
@@ -0,0 +1,115 @@
+$! make libz under VMS
+$! written by Martin P.J. Zinser <m.zinser@gsi.de>
+$!
+$! Look for the compiler used
+$!
+$ ccopt = ""
+$ if f$getsyi("HW_MODEL").ge.1024
+$ then
+$  ccopt = "/prefix=all"+ccopt
+$  comp  = "__decc__=1"
+$  if f$trnlnm("SYS").eqs."" then define sys sys$library:
+$ else
+$  if f$search("SYS$SYSTEM:DECC$COMPILER.EXE").eqs.""
+$   then
+$    comp  = "__vaxc__=1"
+$    if f$trnlnm("SYS").eqs."" then define sys sys$library:
+$   else
+$    if f$trnlnm("SYS").eqs."" then define sys decc$library_include:
+$    ccopt = "/decc/prefix=all"+ccopt
+$    comp  = "__decc__=1"
+$  endif
+$ endif
+$!
+$! Build the thing plain or with mms
+$!
+$ write sys$output "Compiling Zlib sources ..."
+$ if f$search("SYS$SYSTEM:MMS.EXE").eqs.""
+$  then
+$   dele example.obj;*,minigzip.obj;*
+$   CALL MAKE adler32.OBJ "CC ''CCOPT' adler32" -
+                adler32.c zlib.h zconf.h
+$   CALL MAKE compress.OBJ "CC ''CCOPT' compress" -
+                compress.c zlib.h zconf.h
+$   CALL MAKE crc32.OBJ "CC ''CCOPT' crc32" -
+                crc32.c zlib.h zconf.h
+$   CALL MAKE deflate.OBJ "CC ''CCOPT' deflate" -
+                deflate.c deflate.h zutil.h zlib.h zconf.h
+$   CALL MAKE gzio.OBJ "CC ''CCOPT' gzio" -
+                gzio.c zutil.h zlib.h zconf.h
+$   CALL MAKE infblock.OBJ "CC ''CCOPT' infblock" -
+                infblock.c zutil.h zlib.h zconf.h infblock.h
+$   CALL MAKE infcodes.OBJ "CC ''CCOPT' infcodes" -
+                infcodes.c zutil.h zlib.h zconf.h inftrees.h
+$   CALL MAKE inffast.OBJ "CC ''CCOPT' inffast" -
+                inffast.c zutil.h zlib.h zconf.h inffast.h
+$   CALL MAKE inflate.OBJ "CC ''CCOPT' inflate" -
+                inflate.c zutil.h zlib.h zconf.h infblock.h
+$   CALL MAKE inftrees.OBJ "CC ''CCOPT' inftrees" -
+                inftrees.c zutil.h zlib.h zconf.h inftrees.h
+$   CALL MAKE infutil.OBJ "CC ''CCOPT' infutil" -
+                infutil.c zutil.h zlib.h zconf.h inftrees.h infutil.h
+$   CALL MAKE trees.OBJ "CC ''CCOPT' trees" -
+                trees.c deflate.h zutil.h zlib.h zconf.h
+$   CALL MAKE uncompr.OBJ "CC ''CCOPT' uncompr" -
+                uncompr.c zlib.h zconf.h
+$   CALL MAKE zutil.OBJ "CC ''CCOPT' zutil" -
+                zutil.c zutil.h zlib.h zconf.h
+$   write sys$output "Building Zlib ..."
+$   CALL MAKE libz.OLB "lib/crea libz.olb *.obj" *.OBJ
+$   write sys$output "Building example..."
+$   CALL MAKE example.OBJ "CC ''CCOPT' example" -
+                example.c zlib.h zconf.h
+$   call make example.exe "LINK example,libz.olb/lib" example.obj libz.olb
+$   write sys$output "Building minigzip..."
+$   CALL MAKE minigzip.OBJ "CC ''CCOPT' minigzip" -
+                minigzip.c zlib.h zconf.h
+$   call make minigzip.exe - 
+                "LINK minigzip,libz.olb/lib,x11vms:xvmsutils.olb/lib" - 
+                minigzip.obj libz.olb
+$  else
+$   mms/macro=('comp')
+$  endif
+$ write sys$output "Zlib build completed"
+$ exit
+$!
+$!
+$MAKE: SUBROUTINE   !SUBROUTINE TO CHECK DEPENDENCIES
+$ V = 'F$Verify(0)
+$! P1 = What we are trying to make
+$! P2 = Command to make it
+$! P3 - P8  What it depends on
+$
+$ If F$Search(P1) .Eqs. "" Then Goto Makeit
+$ Time = F$CvTime(F$File(P1,"RDT"))
+$arg=3
+$Loop:
+$       Argument = P'arg
+$       If Argument .Eqs. "" Then Goto Exit
+$       El=0
+$Loop2:
+$       File = F$Element(El," ",Argument)
+$       If File .Eqs. " " Then Goto Endl
+$       AFile = ""
+$Loop3:
+$       OFile = AFile
+$       AFile = F$Search(File)
+$       If AFile .Eqs. "" .Or. AFile .Eqs. OFile Then Goto NextEl
+$       If F$CvTime(F$File(AFile,"RDT")) .Ges. Time Then Goto Makeit
+$       Goto Loop3
+$NextEL:
+$       El = El + 1
+$       Goto Loop2
+$EndL:
+$ arg=arg+1
+$ If arg .Le. 8 Then Goto Loop
+$ Goto Exit
+$
+$Makeit:
+$ VV=F$VERIFY(0)
+$ write sys$output P2
+$ 'P2
+$ VV='F$Verify(VV)
+$Exit:
+$ If V Then Set Verify
+$ENDSUBROUTINE
diff --git a/ZLIB/Makefile b/ZLIB/Makefile
new file mode 100644
index 0000000..ab23a7c
--- /dev/null
+++ b/ZLIB/Makefile
@@ -0,0 +1,63 @@
+ZLIB_TARGET=libmyz.a
+ZLIB_SOURCE=adler32.c compress.c crc32.c deflate.c gzio.c infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c trees.c uncompr.c zutil.c
+
+COMPILER = gcc
+#COMPILER = clang
+
+CFLAGS += -Wno-deprecated -Wno-write-strings
+
+CFLAGS_DEBUG = -DDEBUG -g3
+LFLAGS_DEBUG =
+
+CFLAGS_RELEASE = -O3 -DRELEASE -funroll-loops -ffast-math -g
+LFLAGS_RELEASE = -O3 -g
+
+SRC = ./
+BIN = ../Bin/Linux/
+INCLUDE = ../
+
+ifeq ($(COMPILER),gcc)
+	CC=gcc
+	CXX=g++
+else
+	CC=clang-3.8
+	CXX=clang++-3.8
+#	CC=clang-3.5
+#	CXX=clang++-3.5
+endif
+
+MD=mkdir
+
+ZLIB_OBJECTS=$(addprefix $(BIN), $(addsuffix .o, $(basename $(ZLIB_SOURCE))))
+
+all: CFLAGS += $(CFLAGS_RELEASE)
+all: LFLAGS += $(LFLAGS_RELEASE)
+all: make_dir
+all: $(BIN)$(ZLIB_TARGET)
+
+debug: CFLAGS += $(CFLAGS_DEBUG)
+debug: LFLAGS += $(LFLAGS_DEBUG)
+debug: make_dir
+debug: $(BIN)$(ZLIB_TARGET)
+
+zlib: CFLAGS += $(CFLAGS_RELEASE)
+zlib: LFLAGS += $(LFLAGS_RELEASE)
+zlib: make_dir
+zlib: $(BIN)$(ZLIB_TARGET)
+
+clean:
+	rm -rf $(BIN)$(ZLIB_TARGET)
+	rm -rf $(ZLIB_OBJECTS)
+
+make_dir:
+	$(MD) -p $(BIN)
+
+$(BIN)$(ZLIB_TARGET): $(ZLIB_OBJECTS)
+	ar rcs $(BIN)$(ZLIB_TARGET) $(ZLIB_OBJECTS)
+
+$(BIN)%.o: $(SRC)%.c
+	$(CC) -c -o $@ -I$(INCLUDE) $<
+
+$(BIN)%.o: $(SRC)%.cpp
+	$(CXX) -c -o $@ $(CFLAGS) -I$(INCLUDE) $<
+
diff --git a/ZLIB/adler32.c b/ZLIB/adler32.c
new file mode 100644
index 0000000..fdb0e6c
--- /dev/null
+++ b/ZLIB/adler32.c
@@ -0,0 +1,48 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: adler32.c,v 1.1 2014/03/04 21:20:43 uid42406 Exp $ */
+
+#include "zlib.h"
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+/* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    unsigned long s1 = adler & 0xffff;
+    unsigned long s2 = (adler >> 16) & 0xffff;
+    int k;
+
+    if (buf == Z_NULL) return 1L;
+
+    while (len > 0) {
+        k = len < NMAX ? len : NMAX;
+        len -= k;
+        while (k >= 16) {
+            DO16(buf);
+	    buf += 16;
+            k -= 16;
+        }
+        if (k != 0) do {
+            s1 += *buf++;
+	    s2 += s1;
+        } while (--k);
+        s1 %= BASE;
+        s2 %= BASE;
+    }
+    return (s2 << 16) | s1;
+}
diff --git a/ZLIB/compress.c b/ZLIB/compress.c
new file mode 100644
index 0000000..9371538
--- /dev/null
+++ b/ZLIB/compress.c
@@ -0,0 +1,68 @@
+/* compress.c -- compress a memory buffer
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: compress.c,v 1.1 2014/03/04 21:20:43 uid42406 Exp $ */
+
+#include "zlib.h"
+
+/* ===========================================================================
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+    int level;
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+#ifdef MAXSEG_64K
+    /* Check for source > 64K on 16-bit machine: */
+    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
+#endif
+    stream.next_out = dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, level);
+    if (err != Z_OK) return err;
+
+    err = deflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        deflateEnd(&stream);
+        return err == Z_OK ? Z_BUF_ERROR : err;
+    }
+    *destLen = stream.total_out;
+
+    err = deflateEnd(&stream);
+    return err;
+}
+
+/* ===========================================================================
+ */
+int ZEXPORT compress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
+}
diff --git a/ZLIB/crc32.c b/ZLIB/crc32.c
new file mode 100644
index 0000000..9e43ab1
--- /dev/null
+++ b/ZLIB/crc32.c
@@ -0,0 +1,162 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: crc32.c,v 1.1 2014/03/04 21:20:43 uid42406 Exp $ */
+
+#include "zlib.h"
+
+#define local static
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local int crc_table_empty = 1;
+local uLongf crc_table[256];
+local void make_crc_table OF((void));
+
+/*
+  Generate a table for a byte-wise 32-bit CRC calculation on the polynomial:
+  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+  Polynomials over GF(2) are represented in binary, one bit per coefficient,
+  with the lowest powers in the most significant bit.  Then adding polynomials
+  is just exclusive-or, and multiplying a polynomial by x is a right shift by
+  one.  If we call the above polynomial p, and represent a byte as the
+  polynomial q, also with the lowest power in the most significant bit (so the
+  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+  where a mod b means the remainder after dividing a by b.
+
+  This calculation is done using the shift-register method of multiplying and
+  taking the remainder.  The register is initialized to zero, and for each
+  incoming bit, x^32 is added mod p to the register if the bit is a one (where
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+  x (which is shifting right by one and adding x^32 mod p if the bit shifted
+  out is a one).  We start with the highest power (least significant bit) of
+  q and repeat for all eight bits of q.
+
+  The table is simply the CRC of all possible eight bit values.  This is all
+  the information needed to generate CRC's on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.
+*/
+local void make_crc_table()
+{
+  uLong c;
+  int n, k;
+  uLong poly;            /* polynomial exclusive-or pattern */
+  /* terms of polynomial defining this crc (except x^32): */
+  static const Byte p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+  /* make exclusive-or pattern from polynomial (0xedb88320L) */
+  poly = 0L;
+  for (n = 0; n < sizeof(p)/sizeof(Byte); n++)
+    poly |= 1L << (31 - p[n]);
+ 
+  for (n = 0; n < 256; n++)
+  {
+    c = (uLong)n;
+    for (k = 0; k < 8; k++)
+      c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+    crc_table[n] = c;
+  }
+  crc_table_empty = 0;
+}
+#else
+/* ========================================================================
+ * Table of CRC-32's of all single-byte values (made by make_crc_table)
+ */
+local const uLongf crc_table[256] = {
+  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+  0x2d02ef8dL
+};
+#endif
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const uLongf * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+  if (crc_table_empty) make_crc_table();
+#endif
+  return (const uLongf *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+#define DO2(buf)  DO1(buf); DO1(buf);
+#define DO4(buf)  DO2(buf); DO2(buf);
+#define DO8(buf)  DO4(buf); DO4(buf);
+
+/* ========================================================================= */
+uLong ZEXPORT crc32(crc, buf, len)
+    uLong crc;
+    const Bytef *buf;
+    uInt len;
+{
+    if (buf == Z_NULL) return 0L;
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+      make_crc_table();
+#endif
+    crc = crc ^ 0xffffffffL;
+    while (len >= 8)
+    {
+      DO8(buf);
+      len -= 8;
+    }
+    if (len) do {
+      DO1(buf);
+    } while (--len);
+    return crc ^ 0xffffffffL;
+}
diff --git a/ZLIB/deflate.c b/ZLIB/deflate.c
new file mode 100644
index 0000000..ece5764
--- /dev/null
+++ b/ZLIB/deflate.c
@@ -0,0 +1,1350 @@
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process depends on being able to identify portions
+ *      of the input text which are identical to earlier input (within a
+ *      sliding window trailing behind the input currently being processed).
+ *
+ *      The most straightforward technique turns out to be the fastest for
+ *      most input files: try all possible matches and select the longest.
+ *      The key feature of this algorithm is that insertions into the string
+ *      dictionary are very simple and thus fast, and deletions are avoided
+ *      completely. Insertions are performed at each input character, whereas
+ *      string matches are performed only when the previous match ends. So it
+ *      is preferable to spend more time in matches to allow very fast string
+ *      insertions and avoid deletions. The matching algorithm for small
+ *      strings is inspired from that of Rabin & Karp. A brute force approach
+ *      is used to find longer strings when a small match has been found.
+ *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ *      (by Leonid Broukhis).
+ *         A previous version of this file used a more sophisticated algorithm
+ *      (by Fiala and Greene) which is guaranteed to run in linear amortized
+ *      time, but has a larger average cost, uses more memory and is patented.
+ *      However the F&G algorithm may be faster for some highly redundant
+ *      files if the parameter max_chain_length (described below) is too large.
+ *
+ *  ACKNOWLEDGEMENTS
+ *
+ *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ *      I found it in 'freeze' written by Leonid Broukhis.
+ *      Thanks to many people for bug reports and testing.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ *      Available in ftp://ds.internic.net/rfc/rfc1951.txt
+ *
+ *      A description of the Rabin and Karp algorithm is given in the book
+ *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ *      Fiala,E.R., and Greene,D.H.
+ *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* @(#) $Id: deflate.c,v 1.1 2014/03/04 21:20:43 uid42406 Exp $ */
+
+#include "deflate.h"
+
+const char deflate_copyright[] =
+   " deflate 1.1.4 Copyright 1995-2002 Jean-loup Gailly ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ *  Function prototypes.
+ */
+typedef enum {
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
+
+local void fill_window    OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast   OF((deflate_state *s, int flush));
+local block_state deflate_slow   OF((deflate_state *s, int flush));
+local void lm_init        OF((deflate_state *s));
+local void putShortMSB    OF((deflate_state *s, uInt b));
+local void flush_pending  OF((z_streamp strm));
+local int read_buf        OF((z_streamp strm, Bytef *buf, unsigned size));
+#ifdef ASMV
+      void match_init OF((void)); /* asm code initialization */
+      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#endif
+
+#ifdef DEBUG
+local  void check_match OF((deflate_state *s, IPos start, IPos match,
+                            int length));
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+#  define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+   ush good_length; /* reduce lazy search above this match length */
+   ush max_lazy;    /* do not perform lazy search above this match length */
+   ush nice_length; /* quit search above this match length */
+   ush max_chain;
+   compress_func func;
+} config;
+
+local const config configuration_table[10] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}, /* maximum speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, deflate_fast},
+/* 3 */ {4,    6, 32,   32, deflate_fast},
+
+/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
+/* 5 */ {8,   16, 32,   32, deflate_slow},
+/* 6 */ {8,   16, 128, 128, deflate_slow},
+/* 7 */ {8,   32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ *    input characters, so that a running hash key can be computed from the
+ *    previous key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ *    input characters and the first MIN_MATCH bytes of str are valid
+ *    (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#ifdef FASTEST
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#else
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#endif
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+    s->head[s->hash_size-1] = NIL; \
+    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ========================================================================= */
+int ZEXPORT deflateInit_(strm, level, version, stream_size)
+    z_streamp strm;
+    int level;
+    const char *version;
+    int stream_size;
+{
+    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+			 Z_DEFAULT_STRATEGY, version, stream_size);
+    /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+		  version, stream_size)
+    z_streamp strm;
+    int  level;
+    int  method;
+    int  windowBits;
+    int  memLevel;
+    int  strategy;
+    const char *version;
+    int stream_size;
+{
+    deflate_state *s;
+    int noheader = 0;
+    static const char* my_version = ZLIB_VERSION;
+
+    ushf *overlay;
+    /* We overlay pending_buf and d_buf+l_buf. This works since the average
+     * output size for (length,distance) codes is <= 24 bits.
+     */
+
+    if (version == Z_NULL || version[0] != my_version[0] ||
+        stream_size != sizeof(z_stream)) {
+	return Z_VERSION_ERROR;
+    }
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+    strm->msg = Z_NULL;
+    if (strm->zalloc == Z_NULL) {
+	strm->zalloc = zcalloc;
+	strm->opaque = (voidpf)0;
+    }
+    if (strm->zfree == Z_NULL) strm->zfree = zcfree;
+
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#ifdef FASTEST
+    level = 1;
+#endif
+
+    if (windowBits < 0) { /* undocumented feature: suppress zlib header */
+        noheader = 1;
+        windowBits = -windowBits;
+    }
+    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+        windowBits < 9 || windowBits > 15 || level < 0 || level > 9 ||
+	strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+        return Z_STREAM_ERROR;
+    }
+    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    if (s == Z_NULL) return Z_MEM_ERROR;
+    strm->state = (struct internal_state FAR *)s;
+    s->strm = strm;
+
+    s->noheader = noheader;
+    s->w_bits = windowBits;
+    s->w_size = 1 << s->w_bits;
+    s->w_mask = s->w_size - 1;
+
+    s->hash_bits = memLevel + 7;
+    s->hash_size = 1 << s->hash_bits;
+    s->hash_mask = s->hash_size - 1;
+    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+    s->pending_buf = (uchf *) overlay;
+    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+        s->pending_buf == Z_NULL) {
+        strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
+        deflateEnd (strm);
+        return Z_MEM_ERROR;
+    }
+    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+    s->level = level;
+    s->strategy = strategy;
+    s->method = (Byte)method;
+
+    return deflateReset(strm);
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt  dictLength;
+{
+    deflate_state *s;
+    uInt length = dictLength;
+    uInt n;
+    IPos hash_head = 0;
+
+    if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
+        strm->state->status != INIT_STATE) return Z_STREAM_ERROR;
+
+    s = strm->state;
+    strm->adler = adler32(strm->adler, dictionary, dictLength);
+
+    if (length < MIN_MATCH) return Z_OK;
+    if (length > MAX_DIST(s)) {
+	length = MAX_DIST(s);
+#ifndef USE_DICT_HEAD
+	dictionary += dictLength - length; /* use the tail of the dictionary */
+#endif
+    }
+    zmemcpy(s->window, dictionary, length);
+    s->strstart = length;
+    s->block_start = (long)length;
+
+    /* Insert all strings in the hash table (except for the last two bytes).
+     * s->lookahead stays null, so s->ins_h will be recomputed at the next
+     * call of fill_window.
+     */
+    s->ins_h = s->window[0];
+    UPDATE_HASH(s, s->ins_h, s->window[1]);
+    for (n = 0; n <= length - MIN_MATCH; n++) {
+	INSERT_STRING(s, n, hash_head);
+    }
+    if (hash_head) hash_head = 0;  /* to make compiler happy */
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+    
+    if (strm == Z_NULL || strm->state == Z_NULL ||
+        strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR;
+
+    strm->total_in = strm->total_out = 0;
+    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+    strm->data_type = Z_UNKNOWN;
+
+    s = (deflate_state *)strm->state;
+    s->pending = 0;
+    s->pending_out = s->pending_buf;
+
+    if (s->noheader < 0) {
+        s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */
+    }
+    s->status = s->noheader ? BUSY_STATE : INIT_STATE;
+    strm->adler = 1;
+    s->last_flush = Z_NO_FLUSH;
+
+    _tr_init(s);
+    lm_init(s);
+
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state *s;
+    compress_func func;
+    int err = Z_OK;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    s = strm->state;
+
+    if (level == Z_DEFAULT_COMPRESSION) {
+	level = 6;
+    }
+    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+	return Z_STREAM_ERROR;
+    }
+    func = configuration_table[s->level].func;
+
+    if (func != configuration_table[level].func && strm->total_in != 0) {
+	/* Flush the last buffer: */
+	err = deflate(strm, Z_PARTIAL_FLUSH);
+    }
+    if (s->level != level) {
+	s->level = level;
+	s->max_lazy_match   = configuration_table[level].max_lazy;
+	s->good_match       = configuration_table[level].good_length;
+	s->nice_match       = configuration_table[level].nice_length;
+	s->max_chain_length = configuration_table[level].max_chain;
+    }
+    s->strategy = strategy;
+    return err;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+    deflate_state *s;
+    uInt b;
+{
+    put_byte(s, (Byte)(b >> 8));
+    put_byte(s, (Byte)(b & 0xff));
+}   
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output goes
+ * through this function so some applications may wish to modify it
+ * to avoid allocating a large strm->next_out buffer and copying into it.
+ * (See also read_buf()).
+ */
+local void flush_pending(strm)
+    z_streamp strm;
+{
+    unsigned len = strm->state->pending;
+
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, strm->state->pending_out, len);
+    strm->next_out  += len;
+    strm->state->pending_out  += len;
+    strm->total_out += len;
+    strm->avail_out  -= len;
+    strm->state->pending -= len;
+    if (strm->state->pending == 0) {
+        strm->state->pending_out = strm->state->pending_buf;
+    }
+}
+
+/* ========================================================================= */
+int ZEXPORT deflate (strm, flush)
+    z_streamp strm;
+    int flush;
+{
+    int old_flush; /* value of flush param for previous deflate call */
+    deflate_state *s;
+
+    if (strm == Z_NULL || strm->state == Z_NULL ||
+	flush > Z_FINISH || flush < 0) {
+        return Z_STREAM_ERROR;
+    }
+    s = strm->state;
+
+    if (strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0) ||
+	(s->status == FINISH_STATE && flush != Z_FINISH)) {
+        ERR_RETURN(strm, Z_STREAM_ERROR);
+    }
+    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+    s->strm = strm; /* just in case */
+    old_flush = s->last_flush;
+    s->last_flush = flush;
+
+    /* Write the zlib header */
+    if (s->status == INIT_STATE) {
+
+        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+        uInt level_flags = (s->level-1) >> 1;
+
+        if (level_flags > 3) level_flags = 3;
+        header |= (level_flags << 6);
+	if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        s->status = BUSY_STATE;
+        putShortMSB(s, header);
+
+	/* Save the adler32 of the preset dictionary: */
+	if (s->strstart != 0) {
+	    putShortMSB(s, (uInt)(strm->adler >> 16));
+	    putShortMSB(s, (uInt)(strm->adler & 0xffff));
+	}
+	strm->adler = 1L;
+    }
+
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+	    /* Since avail_out is 0, deflate will be called again with
+	     * more output space, but possibly with both pending and
+	     * avail_in equal to zero. There won't be anything to do,
+	     * but this is not an error situation so make sure we
+	     * return OK instead of BUF_ERROR at next call of deflate:
+             */
+	    s->last_flush = -1;
+	    return Z_OK;
+	}
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUFF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && flush <= old_flush &&
+	       flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* Start a new block or continue the current one.
+     */
+    if (strm->avail_in != 0 || s->lookahead != 0 ||
+        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+        block_state bstate;
+
+	bstate = (*(configuration_table[s->level].func))(s, flush);
+
+        if (bstate == finish_started || bstate == finish_done) {
+            s->status = FINISH_STATE;
+        }
+        if (bstate == need_more || bstate == finish_started) {
+	    if (strm->avail_out == 0) {
+	        s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+	    }
+	    return Z_OK;
+	    /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+	     * of deflate should use the same flush parameter to make sure
+	     * that the flush is complete. So we don't have to output an
+	     * empty block here, this will be done at next call. This also
+	     * ensures that for a very small output buffer, we emit at most
+	     * one empty block.
+	     */
+	}
+        if (bstate == block_done) {
+            if (flush == Z_PARTIAL_FLUSH) {
+                _tr_align(s);
+            } else { /* FULL_FLUSH or SYNC_FLUSH */
+                _tr_stored_block(s, (char*)0, 0L, 0);
+                /* For a full flush, this empty block will be recognized
+                 * as a special marker by inflate_sync().
+                 */
+                if (flush == Z_FULL_FLUSH) {
+                    CLEAR_HASH(s);             /* forget history */
+                }
+            }
+            flush_pending(strm);
+	    if (strm->avail_out == 0) {
+	      s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+	      return Z_OK;
+	    }
+        }
+    }
+    Assert(strm->avail_out > 0, "bug2");
+
+    if (flush != Z_FINISH) return Z_OK;
+    if (s->noheader) return Z_STREAM_END;
+
+    /* Write the zlib trailer (adler32) */
+    putShortMSB(s, (uInt)(strm->adler >> 16));
+    putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    flush_pending(strm);
+    /* If avail_out is zero, the application will call deflate again
+     * to flush the rest.
+     */
+    s->noheader = -1; /* write the trailer only once! */
+    return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateEnd (strm)
+    z_streamp strm;
+{
+    int status;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+
+    status = strm->state->status;
+    if (status != INIT_STATE && status != BUSY_STATE &&
+	status != FINISH_STATE) {
+      return Z_STREAM_ERROR;
+    }
+
+    /* Deallocate in reverse order of allocations: */
+    TRY_FREE(strm, strm->state->pending_buf);
+    TRY_FREE(strm, strm->state->head);
+    TRY_FREE(strm, strm->state->prev);
+    TRY_FREE(strm, strm->state->window);
+
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+
+    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ * To simplify the source, this is not supported for 16-bit MSDOS (which
+ * doesn't have enough memory anyway to duplicate compression states).
+ */
+int ZEXPORT deflateCopy (dest, source)
+    z_streamp dest;
+    z_streamp source;
+{
+#ifdef MAXSEG_64K
+    return Z_STREAM_ERROR;
+#else
+    deflate_state *ds;
+    deflate_state *ss;
+    ushf *overlay;
+
+
+    if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
+        return Z_STREAM_ERROR;
+    }
+
+    ss = source->state;
+
+    *dest = *source;
+
+    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    if (ds == Z_NULL) return Z_MEM_ERROR;
+    dest->state = (struct internal_state FAR *) ds;
+    *ds = *ss;
+    ds->strm = dest;
+
+    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+    ds->pending_buf = (uchf *) overlay;
+
+    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+        ds->pending_buf == Z_NULL) {
+        deflateEnd (dest);
+        return Z_MEM_ERROR;
+    }
+    /* following zmemcpy do not work for 16-bit MSDOS */
+    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+    zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+    ds->l_desc.dyn_tree = ds->dyn_ltree;
+    ds->d_desc.dyn_tree = ds->dyn_dtree;
+    ds->bl_desc.dyn_tree = ds->bl_tree;
+
+    return Z_OK;
+#endif
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local int read_buf(strm, buf, size)
+    z_streamp strm;
+    Bytef *buf;
+    unsigned size;
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    if (!strm->state->noheader) {
+        strm->adler = adler32(strm->adler, strm->next_in, len);
+    }
+    zmemcpy(buf, strm->next_in, len);
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return (int)len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init (s)
+    deflate_state *s;
+{
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+#ifdef ASMV
+    match_init(); /* initialize the asm code */
+#endif
+}
+
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+#ifndef FASTEST
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    int best_len = s->prev_length;              /* best match length so far */
+    int nice_match = s->nice_match;             /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : NIL;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    Posf *prev = s->prev;
+    uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+    /* Compare two bytes at a time. Note: this is not always beneficial.
+     * Try with and without -DUNALIGNED_OK to check.
+     */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+    register ush scan_start = *(ushf*)scan;
+    register ush scan_end   = *(ushf*)(scan+best_len-1);
+#else
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end   = scan[best_len];
+#endif
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not waste too much time if we already have a good match: */
+    if (s->prev_length >= s->good_match) {
+        chain_length >>= 2;
+    }
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
+     */
+    if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    do {
+        Assert(cur_match < s->strstart, "no future");
+        match = s->window + cur_match;
+
+        /* Skip to next match if the match length cannot increase
+         * or if the match length is less than 2:
+         */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+        /* This code assumes sizeof(unsigned short) == 2. Do not use
+         * UNALIGNED_OK if your compiler uses a different size.
+         */
+        if (*(ushf*)(match+best_len-1) != scan_end ||
+            *(ushf*)match != scan_start) continue;
+
+        /* It is not necessary to compare scan[2] and match[2] since they are
+         * always equal when the other bytes match, given that the hash keys
+         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * lookahead only every 4th comparison; the 128th check will be made
+         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * necessary to put more guard bytes at the end of the window, or
+         * to check more often for insufficient lookahead.
+         */
+        Assert(scan[2] == match[2], "scan[2]?");
+        scan++, match++;
+        do {
+        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 scan < strend);
+        /* The funny "do {}" generates better code on most compilers */
+
+        /* Here, scan <= window+strstart+257 */
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        if (*scan == *match) scan++;
+
+        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+        if (match[best_len]   != scan_end  ||
+            match[best_len-1] != scan_end1 ||
+            *match            != *scan     ||
+            *++match          != scan[1])      continue;
+
+        /* The check at best_len-1 can be removed because it will be made
+         * again later. (This heuristic is not always a win.)
+         * It is not necessary to compare scan[2] and match[2] since they
+         * are always equal when the other bytes match, given that
+         * the hash keys are equal and that HASH_BITS >= 8.
+         */
+        scan += 2, match++;
+        Assert(*scan == *match, "match[2]?");
+
+        /* We check for insufficient lookahead only every 8th comparison;
+         * the 256th check will be made at strstart+258.
+         */
+        do {
+        } while (*++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 scan < strend);
+
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+        len = MAX_MATCH - (int)(strend - scan);
+        scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+        if (len > best_len) {
+            s->match_start = cur_match;
+            best_len = len;
+            if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+            scan_end = *(ushf*)(scan+best_len-1);
+#else
+            scan_end1  = scan[best_len-1];
+            scan_end   = scan[best_len];
+#endif
+        }
+    } while ((cur_match = prev[cur_match & wmask]) > limit
+             && --chain_length != 0);
+
+    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+    return s->lookahead;
+}
+
+#else /* FASTEST */
+/* ---------------------------------------------------------------------------
+ * Optimized version for level == 1 only
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    Assert(cur_match < s->strstart, "no future");
+
+    match = s->window + cur_match;
+
+    /* Return failure if the match length is less than 2:
+     */
+    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
+
+    /* The check at best_len-1 can be removed because it will be made
+     * again later. (This heuristic is not always a win.)
+     * It is not necessary to compare scan[2] and match[2] since they
+     * are always equal when the other bytes match, given that
+     * the hash keys are equal and that HASH_BITS >= 8.
+     */
+    scan += 2, match += 2;
+    Assert(*scan == *match, "match[2]?");
+
+    /* We check for insufficient lookahead only every 8th comparison;
+     * the 256th check will be made at strstart+258.
+     */
+    do {
+    } while (*++scan == *++match && *++scan == *++match &&
+	     *++scan == *++match && *++scan == *++match &&
+	     *++scan == *++match && *++scan == *++match &&
+	     *++scan == *++match && *++scan == *++match &&
+	     scan < strend);
+
+    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+    len = MAX_MATCH - (int)(strend - scan);
+
+    if (len < MIN_MATCH) return MIN_MATCH - 1;
+
+    s->match_start = cur_match;
+    return len <= s->lookahead ? len : s->lookahead;
+}
+#endif /* FASTEST */
+#endif /* ASMV */
+
+#ifdef DEBUG
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local void check_match(s, start, match, length)
+    deflate_state *s;
+    IPos start, match;
+    int length;
+{
+    /* check that the match is indeed a match */
+    if (zmemcmp(s->window + match,
+                s->window + start, length) != EQUAL) {
+        fprintf(stderr, " start %u, match %u, length %d\n",
+		start, match, length);
+        do {
+	    fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+	} while (--length != 0);
+        z_error("invalid match");
+    }
+    if (z_verbose > 1) {
+        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        do { putc(s->window[start++], stderr); } while (--length != 0);
+    }
+}
+#else
+#  define check_match(s, start, match, length)
+#endif
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(s)
+    deflate_state *s;
+{
+    register unsigned n, m;
+    register Posf *p;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+            more = wsize;
+
+        } else if (more == (unsigned)(-1)) {
+            /* Very unlikely, but possible on 16 bit machine if strstart == 0
+             * and lookahead == 1 (input done one byte at time)
+             */
+            more--;
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        } else if (s->strstart >= wsize+MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+
+            /* Slide the hash table (could be avoided with 32 bit values
+               at the expense of memory usage). We slide even when level == 0
+               to keep the hash table consistent if we switch back to level > 0
+               later. (Using level 0 permanently is not an optimal usage of
+               zlib, so we don't care about this pathological case.)
+             */
+	    n = s->hash_size;
+	    p = &s->head[n];
+	    do {
+		m = *--p;
+		*p = (Pos)(m >= wsize ? m-wsize : NIL);
+	    } while (--n);
+
+	    n = wsize;
+#ifndef FASTEST
+	    p = &s->prev[n];
+	    do {
+		m = *--p;
+		*p = (Pos)(m >= wsize ? m-wsize : NIL);
+		/* If n is not on any hash chain, prev[n] is garbage but
+		 * its value will never be used.
+		 */
+	    } while (--n);
+#endif
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) return;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead >= MIN_MATCH) {
+            s->ins_h = s->window[s->strstart];
+            UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, eof) { \
+   _tr_flush_block(s, (s->block_start >= 0L ? \
+                   (charf *)&s->window[(unsigned)s->block_start] : \
+                   (charf *)Z_NULL), \
+		(ulg)((long)s->strstart - s->block_start), \
+		(eof)); \
+   s->block_start = s->strstart; \
+   flush_pending(s->strm); \
+   Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, eof) { \
+   FLUSH_BLOCK_ONLY(s, eof); \
+   if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
+}
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ * This function does not insert new strings in the dictionary since
+ * uncompressible data is probably not useful. This function is used
+ * only for the level=0 compression option.
+ * NOTE: this function should be optimized to avoid extra copying from
+ * window to pending_buf.
+ */
+local block_state deflate_stored(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
+     * to pending_buf_size, and each stored block has a 5 byte header:
+     */
+    ulg max_block_size = 0xffff;
+    ulg max_start;
+
+    if (max_block_size > s->pending_buf_size - 5) {
+        max_block_size = s->pending_buf_size - 5;
+    }
+
+    /* Copy as much as possible from input to output: */
+    for (;;) {
+        /* Fill the window as much as possible: */
+        if (s->lookahead <= 1) {
+
+            Assert(s->strstart < s->w_size+MAX_DIST(s) ||
+		   s->block_start >= (long)s->w_size, "slide too late");
+
+            fill_window(s);
+            if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+	Assert(s->block_start >= 0L, "block gone");
+
+	s->strstart += s->lookahead;
+	s->lookahead = 0;
+
+	/* Emit a stored block if pending_buf will be full: */
+ 	max_start = s->block_start + max_block_size;
+        if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
+	    /* strstart == 0 is possible when wraparound on 16-bit machine */
+	    s->lookahead = (uInt)(s->strstart - max_start);
+	    s->strstart = (uInt)max_start;
+            FLUSH_BLOCK(s, 0);
+	}
+	/* Flush if we may have to slide, otherwise block_start may become
+         * negative and the data will be gone:
+         */
+        if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
+            FLUSH_BLOCK(s, 0);
+	}
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head = NIL; /* head of the hash chain */
+    int bflush;           /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+	        return need_more;
+	    }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         * At this point we have always match_length < MIN_MATCH
+         */
+        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            if (s->strategy != Z_HUFFMAN_ONLY) {
+                s->match_length = longest_match (s, hash_head);
+            }
+            /* longest_match() sets match_start */
+        }
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, s->match_length);
+
+            _tr_tally_dist(s, s->strstart - s->match_start,
+                           s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+
+            /* Insert new strings in the hash table only if the match length
+             * is not too large. This saves time but degrades compression.
+             */
+#ifndef FASTEST
+            if (s->match_length <= s->max_insert_length &&
+                s->lookahead >= MIN_MATCH) {
+                s->match_length--; /* string at strstart already in hash table */
+                do {
+                    s->strstart++;
+                    INSERT_STRING(s, s->strstart, hash_head);
+                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+                     * always MIN_MATCH bytes ahead.
+                     */
+                } while (--s->match_length != 0);
+                s->strstart++; 
+            } else
+#endif
+	    {
+                s->strstart += s->match_length;
+                s->match_length = 0;
+                s->ins_h = s->window[s->strstart];
+                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+                Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+                 * matter since it will be recomputed at next deflate call.
+                 */
+            }
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++; 
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local block_state deflate_slow(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head = NIL;    /* head of hash chain */
+    int bflush;              /* set if current block must be flushed */
+
+    /* Process the input block. */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+	        return need_more;
+	    }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         */
+        s->prev_length = s->match_length, s->prev_match = s->match_start;
+        s->match_length = MIN_MATCH-1;
+
+        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+            s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            if (s->strategy != Z_HUFFMAN_ONLY) {
+                s->match_length = longest_match (s, hash_head);
+            }
+            /* longest_match() sets match_start */
+
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
+                 (s->match_length == MIN_MATCH &&
+                  s->strstart - s->match_start > TOO_FAR))) {
+
+                /* If prev_match is also MIN_MATCH, match_start is garbage
+                 * but we will ignore the current match anyway.
+                 */
+                s->match_length = MIN_MATCH-1;
+            }
+        }
+        /* If there was a match at the previous step and the current
+         * match is not better, output the previous match:
+         */
+        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+            /* Do not insert strings in hash table beyond this. */
+
+            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+			   s->prev_length - MIN_MATCH, bflush);
+
+            /* Insert in hash table all strings up to the end of the match.
+             * strstart-1 and strstart are already inserted. If there is not
+             * enough lookahead, the last two strings are not inserted in
+             * the hash table.
+             */
+            s->lookahead -= s->prev_length-1;
+            s->prev_length -= 2;
+            do {
+                if (++s->strstart <= max_insert) {
+                    INSERT_STRING(s, s->strstart, hash_head);
+                }
+            } while (--s->prev_length != 0);
+            s->match_available = 0;
+            s->match_length = MIN_MATCH-1;
+            s->strstart++;
+
+            if (bflush) FLUSH_BLOCK(s, 0);
+
+        } else if (s->match_available) {
+            /* If there was no match at the previous position, output a
+             * single literal. If there was a match but the current match
+             * is longer, truncate the previous match to a single literal.
+             */
+            Tracevv((stderr,"%c", s->window[s->strstart-1]));
+	    _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+	    if (bflush) {
+                FLUSH_BLOCK_ONLY(s, 0);
+            }
+            s->strstart++;
+            s->lookahead--;
+            if (s->strm->avail_out == 0) return need_more;
+        } else {
+            /* There is no previous match to compare with, wait for
+             * the next step to decide.
+             */
+            s->match_available = 1;
+            s->strstart++;
+            s->lookahead--;
+        }
+    }
+    Assert (flush != Z_NO_FLUSH, "no flush?");
+    if (s->match_available) {
+        Tracevv((stderr,"%c", s->window[s->strstart-1]));
+        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        s->match_available = 0;
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
diff --git a/ZLIB/deflate.h b/ZLIB/deflate.h
new file mode 100644
index 0000000..1a77c5f
--- /dev/null
+++ b/ZLIB/deflate.h
@@ -0,0 +1,318 @@
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-2002 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id: deflate.h,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#ifndef _DEFLATE_H
+#define _DEFLATE_H
+
+#include "zutil.h"
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS  256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES   30
+/* number of distance codes */
+
+#define BL_CODES  19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define INIT_STATE    42
+#define BUSY_STATE   113
+#define FINISH_STATE 666
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad  dl.dad
+#define Len  dl.len
+
+typedef struct static_tree_desc_s  static_tree_desc;
+
+typedef struct tree_desc_s {
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    static_tree_desc *stat_desc; /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    int   pending;       /* nb of bytes in the pending buffer */
+    int   noheader;      /* suppress zlib header and adler32 */
+    Byte  data_type;     /* UNKNOWN, BINARY or ASCII */
+    Byte  method;        /* STORED (for zip only) or DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+
+                /* used by deflate.c: */
+
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
+
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
+
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
+
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
+
+    Posf *head; /* Heads of the hash chains or NIL. */
+
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
+
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
+
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
+
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
+
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
+
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
+
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
+#   define max_insert_length  max_lazy_match
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
+
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
+
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
+
+    int nice_match; /* Stop searching when current match exceeds this */
+
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to supress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
+
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
+
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
+
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
+
+    uchf *l_buf;          /* buffer for literals or lengths */
+
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
+
+    uInt last_lit;      /* running index in l_buf */
+
+    ushf *d_buf;
+    /* Buffer for distances. To simplify the code, d_buf and l_buf have
+     * the same number of elements. To use different lengths, an extra flag
+     * array would be necessary.
+     */
+
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    int last_eob_len;   /* bit length of EOB code for last block */
+
+#ifdef DEBUG
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+#endif
+
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+        /* in trees.c */
+void _tr_init         OF((deflate_state *s));
+int  _tr_tally        OF((deflate_state *s, unsigned dist, unsigned lc));
+void _tr_flush_block  OF((deflate_state *s, charf *buf, ulg stored_len,
+			  int eof));
+void _tr_align        OF((deflate_state *s));
+void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
+                          int eof));
+
+#define d_code(dist) \
+   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
+
+#ifndef DEBUG
+/* Inline versions of _tr_tally for speed: */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+  extern uch _length_code[];
+  extern uch _dist_code[];
+#else
+  extern const uch _length_code[];
+  extern const uch _dist_code[];
+#endif
+
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->last_lit] = 0; \
+    s->l_buf[s->last_lit++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (length); \
+    ush dist = (distance); \
+    s->d_buf[s->last_lit] = dist; \
+    s->l_buf[s->last_lit++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+  }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+              flush = _tr_tally(s, distance, length) 
+#endif
+
+#endif
diff --git a/ZLIB/gzio.c b/ZLIB/gzio.c
new file mode 100644
index 0000000..9c5b9ca
--- /dev/null
+++ b/ZLIB/gzio.c
@@ -0,0 +1,875 @@
+/* gzio.c -- IO on .gz files
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Compile this file with -DNO_DEFLATE to avoid the compression code.
+ */
+
+/* @(#) $Id: gzio.c,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#include <stdio.h>
+
+#include "zutil.h"
+
+struct internal_state {int dummy;}; /* for buggy compilers */
+
+#ifndef Z_BUFSIZE
+#  ifdef MAXSEG_64K
+#    define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */
+#  else
+#    define Z_BUFSIZE 16384
+#  endif
+#endif
+#ifndef Z_PRINTF_BUFSIZE
+#  define Z_PRINTF_BUFSIZE 4096
+#endif
+
+#define ALLOC(size) malloc(size)
+#define TRYFREE(p) {if (p) free(p);}
+
+static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
+#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define RESERVED     0xE0 /* bits 5..7: reserved */
+
+typedef struct gz_stream {
+    z_stream stream;
+    int      z_err;   /* error code for last stream operation */
+    int      z_eof;   /* set if end of input file */
+    FILE     *file;   /* .gz file */
+    Byte     *inbuf;  /* input buffer */
+    Byte     *outbuf; /* output buffer */
+    uLong    crc;     /* crc32 of uncompressed data */
+    char     *msg;    /* error message */
+    char     *path;   /* path name for debugging only */
+    int      transparent; /* 1 if input file is not a .gz file */
+    char     mode;    /* 'w' or 'r' */
+    long     startpos; /* start of compressed data in file (header skipped) */
+} gz_stream;
+
+
+local gzFile gz_open      OF((const char *path, const char *mode, int  fd));
+local int do_flush        OF((gzFile file, int flush));
+local int    get_byte     OF((gz_stream *s));
+local void   check_header OF((gz_stream *s));
+local int    destroy      OF((gz_stream *s));
+local void   putLong      OF((FILE *file, uLong x));
+local uLong  getLong      OF((gz_stream *s));
+
+/* ===========================================================================
+     Opens a gzip (.gz) file for reading or writing. The mode parameter
+   is as in fopen ("rb" or "wb"). The file is given either by file descriptor
+   or path name (if fd == -1).
+     gz_open return NULL if the file could not be opened or if there was
+   insufficient memory to allocate the (de)compression state; errno
+   can be checked to distinguish the two cases (if errno is zero, the
+   zlib error is Z_MEM_ERROR).
+*/
+local gzFile gz_open (path, mode, fd)
+    const char *path;
+    const char *mode;
+    int  fd;
+{
+    int err;
+    int level = Z_DEFAULT_COMPRESSION; /* compression level */
+    int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */
+    char *p = (char*)mode;
+    gz_stream *s;
+    char fmode[80]; /* copy of mode, without the compression level */
+    char *m = fmode;
+
+    if (!path || !mode) return Z_NULL;
+
+    s = (gz_stream *)ALLOC(sizeof(gz_stream));
+    if (!s) return Z_NULL;
+
+    s->stream.zalloc = (alloc_func)0;
+    s->stream.zfree = (free_func)0;
+    s->stream.opaque = (voidpf)0;
+    s->stream.next_in = s->inbuf = Z_NULL;
+    s->stream.next_out = s->outbuf = Z_NULL;
+    s->stream.avail_in = s->stream.avail_out = 0;
+    s->file = NULL;
+    s->z_err = Z_OK;
+    s->z_eof = 0;
+    s->crc = crc32(0L, Z_NULL, 0);
+    s->msg = NULL;
+    s->transparent = 0;
+
+    s->path = (char*)ALLOC(strlen(path)+1);
+    if (s->path == NULL) {
+        return destroy(s), (gzFile)Z_NULL;
+    }
+    strcpy(s->path, path); /* do this early for debugging */
+
+    s->mode = '\0';
+    do {
+        if (*p == 'r') s->mode = 'r';
+        if (*p == 'w' || *p == 'a') s->mode = 'w';
+        if (*p >= '0' && *p <= '9') {
+	    level = *p - '0';
+	} else if (*p == 'f') {
+	  strategy = Z_FILTERED;
+	} else if (*p == 'h') {
+	  strategy = Z_HUFFMAN_ONLY;
+	} else {
+	    *m++ = *p; /* copy the mode */
+	}
+    } while (*p++ && m != fmode + sizeof(fmode));
+    if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL;
+    
+    if (s->mode == 'w') {
+#ifdef NO_DEFLATE
+        err = Z_STREAM_ERROR;
+#else
+        err = deflateInit2(&(s->stream), level,
+                           Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy);
+        /* windowBits is passed < 0 to suppress zlib header */
+
+        s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
+#endif
+        if (err != Z_OK || s->outbuf == Z_NULL) {
+            return destroy(s), (gzFile)Z_NULL;
+        }
+    } else {
+        s->stream.next_in  = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
+
+        err = inflateInit2(&(s->stream), -MAX_WBITS);
+        /* windowBits is passed < 0 to tell that there is no zlib header.
+         * Note that in this case inflate *requires* an extra "dummy" byte
+         * after the compressed stream in order to complete decompression and
+         * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
+         * present after the compressed stream.
+         */
+        if (err != Z_OK || s->inbuf == Z_NULL) {
+            return destroy(s), (gzFile)Z_NULL;
+        }
+    }
+    s->stream.avail_out = Z_BUFSIZE;
+
+    errno = 0;
+    s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode);
+
+    if (s->file == NULL) {
+        return destroy(s), (gzFile)Z_NULL;
+    }
+    if (s->mode == 'w') {
+        /* Write a very simple .gz header:
+         */
+        fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1],
+             Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE);
+	s->startpos = 10L;
+	/* We use 10L instead of ftell(s->file) to because ftell causes an
+         * fflush on some systems. This version of the library doesn't use
+         * startpos anyway in write mode, so this initialization is not
+         * necessary.
+         */
+    } else {
+	check_header(s); /* skip the .gz header */
+	s->startpos = (ftell(s->file) - s->stream.avail_in);
+    }
+    
+    return (gzFile)s;
+}
+
+/* ===========================================================================
+     Opens a gzip (.gz) file for reading or writing.
+*/
+gzFile ZEXPORT gzopen (path, mode)
+    const char *path;
+    const char *mode;
+{
+    return gz_open (path, mode, -1);
+}
+
+/* ===========================================================================
+     Associate a gzFile with the file descriptor fd. fd is not dup'ed here
+   to mimic the behavio(u)r of fdopen.
+*/
+gzFile ZEXPORT gzdopen (fd, mode)
+    int fd;
+    const char *mode;
+{
+    char name[20];
+
+    if (fd < 0) return (gzFile)Z_NULL;
+    sprintf(name, "<fd:%d>", fd); /* for debugging */
+
+    return gz_open (name, mode, fd);
+}
+
+/* ===========================================================================
+ * Update the compression level and strategy
+ */
+int ZEXPORT gzsetparams (file, level, strategy)
+    gzFile file;
+    int level;
+    int strategy;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    /* Make room to allow flushing */
+    if (s->stream.avail_out == 0) {
+
+	s->stream.next_out = s->outbuf;
+	if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
+	    s->z_err = Z_ERRNO;
+	}
+	s->stream.avail_out = Z_BUFSIZE;
+    }
+
+    return deflateParams (&(s->stream), level, strategy);
+}
+
+/* ===========================================================================
+     Read a byte from a gz_stream; update next_in and avail_in. Return EOF
+   for end of file.
+   IN assertion: the stream s has been sucessfully opened for reading.
+*/
+local int get_byte(s)
+    gz_stream *s;
+{
+    if (s->z_eof) return EOF;
+    if (s->stream.avail_in == 0) {
+	errno = 0;
+	s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file);
+	if (s->stream.avail_in == 0) {
+	    s->z_eof = 1;
+	    if (ferror(s->file)) s->z_err = Z_ERRNO;
+	    return EOF;
+	}
+	s->stream.next_in = s->inbuf;
+    }
+    s->stream.avail_in--;
+    return *(s->stream.next_in)++;
+}
+
+/* ===========================================================================
+      Check the gzip header of a gz_stream opened for reading. Set the stream
+    mode to transparent if the gzip magic header is not present; set s->err
+    to Z_DATA_ERROR if the magic header is present but the rest of the header
+    is incorrect.
+    IN assertion: the stream s has already been created sucessfully;
+       s->stream.avail_in is zero for the first time, but may be non-zero
+       for concatenated .gz files.
+*/
+local void check_header(s)
+    gz_stream *s;
+{
+    int method; /* method byte */
+    int flags;  /* flags byte */
+    uInt len;
+    int c;
+
+    /* Check the gzip magic header */
+    for (len = 0; len < 2; len++) {
+	c = get_byte(s);
+	if (c != gz_magic[len]) {
+	    if (len != 0) s->stream.avail_in++, s->stream.next_in--;
+	    if (c != EOF) {
+		s->stream.avail_in++, s->stream.next_in--;
+		s->transparent = 1;
+	    }
+	    s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END;
+	    return;
+	}
+    }
+    method = get_byte(s);
+    flags = get_byte(s);
+    if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
+	s->z_err = Z_DATA_ERROR;
+	return;
+    }
+
+    /* Discard time, xflags and OS code: */
+    for (len = 0; len < 6; len++) (void)get_byte(s);
+
+    if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
+	len  =  (uInt)get_byte(s);
+	len += ((uInt)get_byte(s))<<8;
+	/* len is garbage if EOF but the loop below will quit anyway */
+	while (len-- != 0 && get_byte(s) != EOF) ;
+    }
+    if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
+	while ((c = get_byte(s)) != 0 && c != EOF) ;
+    }
+    if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
+	while ((c = get_byte(s)) != 0 && c != EOF) ;
+    }
+    if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
+	for (len = 0; len < 2; len++) (void)get_byte(s);
+    }
+    s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK;
+}
+
+ /* ===========================================================================
+ * Cleanup then free the given gz_stream. Return a zlib error code.
+   Try freeing in the reverse order of allocations.
+ */
+local int destroy (s)
+    gz_stream *s;
+{
+    int err = Z_OK;
+
+    if (!s) return Z_STREAM_ERROR;
+
+    TRYFREE(s->msg);
+
+    if (s->stream.state != NULL) {
+	if (s->mode == 'w') {
+#ifdef NO_DEFLATE
+	    err = Z_STREAM_ERROR;
+#else
+	    err = deflateEnd(&(s->stream));
+#endif
+	} else if (s->mode == 'r') {
+	    err = inflateEnd(&(s->stream));
+	}
+    }
+    if (s->file != NULL && fclose(s->file)) {
+#ifdef ESPIPE
+	if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */
+#endif
+	    err = Z_ERRNO;
+    }
+    if (s->z_err < 0) err = s->z_err;
+
+    TRYFREE(s->inbuf);
+    TRYFREE(s->outbuf);
+    TRYFREE(s->path);
+    TRYFREE(s);
+    return err;
+}
+
+/* ===========================================================================
+     Reads the given number of uncompressed bytes from the compressed file.
+   gzread returns the number of bytes actually read (0 for end of file).
+*/
+int ZEXPORT gzread (file, buf, len)
+    gzFile file;
+    voidp buf;
+    unsigned len;
+{
+    gz_stream *s = (gz_stream*)file;
+    Bytef *start = (Bytef*)buf; /* starting point for crc computation */
+    Byte  *next_out; /* == stream.next_out but not forced far (for MSDOS) */
+
+    if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;
+
+    if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1;
+    if (s->z_err == Z_STREAM_END) return 0;  /* EOF */
+
+    next_out = (Byte*)buf;
+    s->stream.next_out = (Bytef*)buf;
+    s->stream.avail_out = len;
+
+    while (s->stream.avail_out != 0) {
+
+	if (s->transparent) {
+	    /* Copy first the lookahead bytes: */
+	    uInt n = s->stream.avail_in;
+	    if (n > s->stream.avail_out) n = s->stream.avail_out;
+	    if (n > 0) {
+		zmemcpy(s->stream.next_out, s->stream.next_in, n);
+		next_out += n;
+		s->stream.next_out = next_out;
+		s->stream.next_in   += n;
+		s->stream.avail_out -= n;
+		s->stream.avail_in  -= n;
+	    }
+	    if (s->stream.avail_out > 0) {
+		s->stream.avail_out -= fread(next_out, 1, s->stream.avail_out,
+					     s->file);
+	    }
+	    len -= s->stream.avail_out;
+	    s->stream.total_in  += (uLong)len;
+	    s->stream.total_out += (uLong)len;
+            if (len == 0) s->z_eof = 1;
+	    return (int)len;
+	}
+        if (s->stream.avail_in == 0 && !s->z_eof) {
+
+            errno = 0;
+            s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file);
+            if (s->stream.avail_in == 0) {
+                s->z_eof = 1;
+		if (ferror(s->file)) {
+		    s->z_err = Z_ERRNO;
+		    break;
+		}
+            }
+            s->stream.next_in = s->inbuf;
+        }
+        s->z_err = inflate(&(s->stream), Z_NO_FLUSH);
+
+	if (s->z_err == Z_STREAM_END) {
+	    /* Check CRC and original size */
+	    s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
+	    start = s->stream.next_out;
+
+	    if (getLong(s) != s->crc) {
+		s->z_err = Z_DATA_ERROR;
+	    } else {
+	        (void)getLong(s);
+                /* The uncompressed length returned by above getlong() may
+                 * be different from s->stream.total_out) in case of
+		 * concatenated .gz files. Check for such files:
+		 */
+		check_header(s);
+		if (s->z_err == Z_OK) {
+		    uLong total_in = s->stream.total_in;
+		    uLong total_out = s->stream.total_out;
+
+		    inflateReset(&(s->stream));
+		    s->stream.total_in = total_in;
+		    s->stream.total_out = total_out;
+		    s->crc = crc32(0L, Z_NULL, 0);
+		}
+	    }
+	}
+	if (s->z_err != Z_OK || s->z_eof) break;
+    }
+    s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
+
+    return (int)(len - s->stream.avail_out);
+}
+
+
+/* ===========================================================================
+      Reads one byte from the compressed file. gzgetc returns this byte
+   or -1 in case of end of file or error.
+*/
+int ZEXPORT gzgetc(file)
+    gzFile file;
+{
+    unsigned char c;
+
+    return gzread(file, &c, 1) == 1 ? c : -1;
+}
+
+
+/* ===========================================================================
+      Reads bytes from the compressed file until len-1 characters are
+   read, or a newline character is read and transferred to buf, or an
+   end-of-file condition is encountered.  The string is then terminated
+   with a null character.
+      gzgets returns buf, or Z_NULL in case of error.
+
+      The current implementation is not optimized at all.
+*/
+char * ZEXPORT gzgets(file, buf, len)
+    gzFile file;
+    char *buf;
+    int len;
+{
+    char *b = buf;
+    if (buf == Z_NULL || len <= 0) return Z_NULL;
+
+    while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ;
+    *buf = '\0';
+    return b == buf && len > 0 ? Z_NULL : b;
+}
+
+
+#ifndef NO_DEFLATE
+/* ===========================================================================
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of bytes actually written (0 in case of error).
+*/
+int ZEXPORT gzwrite (file, buf, len)
+    gzFile file;
+    const voidp buf;
+    unsigned len;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    s->stream.next_in = (Bytef*)buf;
+    s->stream.avail_in = len;
+
+    while (s->stream.avail_in != 0) {
+
+        if (s->stream.avail_out == 0) {
+
+            s->stream.next_out = s->outbuf;
+            if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
+                s->z_err = Z_ERRNO;
+                break;
+            }
+            s->stream.avail_out = Z_BUFSIZE;
+        }
+        s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
+        if (s->z_err != Z_OK) break;
+    }
+    s->crc = crc32(s->crc, (const Bytef *)buf, len);
+
+    return (int)(len - s->stream.avail_in);
+}
+
+/* ===========================================================================
+     Converts, formats, and writes the args to the compressed file under
+   control of the format string, as in fprintf. gzprintf returns the number of
+   uncompressed bytes actually written (0 in case of error).
+*/
+#ifdef STDC
+#include <stdarg.h>
+
+int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...)
+{
+    char buf[Z_PRINTF_BUFSIZE];
+    va_list va;
+    int len;
+
+    va_start(va, format);
+#ifdef HAS_vsnprintf
+    (void)vsnprintf(buf, sizeof(buf), format, va);
+#else
+    (void)vsprintf(buf, format, va);
+#endif
+    va_end(va);
+    len = strlen(buf); /* some *sprintf don't return the nb of bytes written */
+    if (len <= 0) return 0;
+
+    return gzwrite(file, buf, (unsigned)len);
+}
+#else /* not ANSI C */
+
+int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+	               a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
+    gzFile file;
+    const char *format;
+    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+	a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
+{
+    char buf[Z_PRINTF_BUFSIZE];
+    int len;
+
+#ifdef HAS_snprintf
+    snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
+	     a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#else
+    sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
+	    a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#endif
+    len = strlen(buf); /* old sprintf doesn't return the nb of bytes written */
+    if (len <= 0) return 0;
+
+    return gzwrite(file, buf, len);
+}
+#endif
+
+/* ===========================================================================
+      Writes c, converted to an unsigned char, into the compressed file.
+   gzputc returns the value that was written, or -1 in case of error.
+*/
+int ZEXPORT gzputc(file, c)
+    gzFile file;
+    int c;
+{
+    unsigned char cc = (unsigned char) c; /* required for big endian systems */
+
+    return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1;
+}
+
+
+/* ===========================================================================
+      Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+      gzputs returns the number of characters written, or -1 in case of error.
+*/
+int ZEXPORT gzputs(file, s)
+    gzFile file;
+    const char *s;
+{
+    return gzwrite(file, (char*)s, (unsigned)strlen(s));
+}
+
+
+/* ===========================================================================
+     Flushes all pending output into the compressed file. The parameter
+   flush is as in the deflate() function.
+*/
+local int do_flush (file, flush)
+    gzFile file;
+    int flush;
+{
+    uInt len;
+    int done = 0;
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    s->stream.avail_in = 0; /* should be zero already anyway */
+
+    for (;;) {
+        len = Z_BUFSIZE - s->stream.avail_out;
+
+        if (len != 0) {
+            if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) {
+                s->z_err = Z_ERRNO;
+                return Z_ERRNO;
+            }
+            s->stream.next_out = s->outbuf;
+            s->stream.avail_out = Z_BUFSIZE;
+        }
+        if (done) break;
+        s->z_err = deflate(&(s->stream), flush);
+
+	/* Ignore the second of two consecutive flushes: */
+	if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK;
+
+        /* deflate has finished flushing only when it hasn't used up
+         * all the available space in the output buffer: 
+         */
+        done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
+ 
+        if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
+    }
+    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
+}
+
+int ZEXPORT gzflush (file, flush)
+     gzFile file;
+     int flush;
+{
+    gz_stream *s = (gz_stream*)file;
+    int err = do_flush (file, flush);
+
+    if (err) return err;
+    fflush(s->file);
+    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
+}
+#endif /* NO_DEFLATE */
+
+/* ===========================================================================
+      Sets the starting position for the next gzread or gzwrite on the given
+   compressed file. The offset represents a number of bytes in the
+      gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error.
+      SEEK_END is not implemented, returns error.
+      In this version of the library, gzseek can be extremely slow.
+*/
+z_off_t ZEXPORT gzseek (file, offset, whence)
+    gzFile file;
+    z_off_t offset;
+    int whence;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || whence == SEEK_END ||
+	s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) {
+	return -1L;
+    }
+    
+    if (s->mode == 'w') {
+#ifdef NO_DEFLATE
+	return -1L;
+#else
+	if (whence == SEEK_SET) {
+	    offset -= s->stream.total_in;
+	}
+	if (offset < 0) return -1L;
+
+	/* At this point, offset is the number of zero bytes to write. */
+	if (s->inbuf == Z_NULL) {
+	    s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */
+	    zmemzero(s->inbuf, Z_BUFSIZE);
+	}
+	while (offset > 0)  {
+	    uInt size = Z_BUFSIZE;
+	    if (offset < Z_BUFSIZE) size = (uInt)offset;
+
+	    size = gzwrite(file, s->inbuf, size);
+	    if (size == 0) return -1L;
+
+	    offset -= size;
+	}
+	return (z_off_t)s->stream.total_in;
+#endif
+    }
+    /* Rest of function is for reading only */
+
+    /* compute absolute position */
+    if (whence == SEEK_CUR) {
+	offset += s->stream.total_out;
+    }
+    if (offset < 0) return -1L;
+
+    if (s->transparent) {
+	/* map to fseek */
+	s->stream.avail_in = 0;
+	s->stream.next_in = s->inbuf;
+        if (fseek(s->file, offset, SEEK_SET) < 0) return -1L;
+
+	s->stream.total_in = s->stream.total_out = (uLong)offset;
+	return offset;
+    }
+
+    /* For a negative seek, rewind and use positive seek */
+    if ((uLong)offset >= s->stream.total_out) {
+	offset -= s->stream.total_out;
+    } else if (gzrewind(file) < 0) {
+	return -1L;
+    }
+    /* offset is now the number of bytes to skip. */
+
+    if (offset != 0 && s->outbuf == Z_NULL) {
+	s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
+    }
+    while (offset > 0)  {
+	int size = Z_BUFSIZE;
+	if (offset < Z_BUFSIZE) size = (int)offset;
+
+	size = gzread(file, s->outbuf, (uInt)size);
+	if (size <= 0) return -1L;
+	offset -= size;
+    }
+    return (z_off_t)s->stream.total_out;
+}
+
+/* ===========================================================================
+     Rewinds input file. 
+*/
+int ZEXPORT gzrewind (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+    
+    if (s == NULL || s->mode != 'r') return -1;
+
+    s->z_err = Z_OK;
+    s->z_eof = 0;
+    s->stream.avail_in = 0;
+    s->stream.next_in = s->inbuf;
+    s->crc = crc32(0L, Z_NULL, 0);
+	
+    if (s->startpos == 0) { /* not a compressed file */
+	rewind(s->file);
+	return 0;
+    }
+
+    (void) inflateReset(&s->stream);
+    return fseek(s->file, s->startpos, SEEK_SET);
+}
+
+/* ===========================================================================
+     Returns the starting position for the next gzread or gzwrite on the
+   given compressed file. This position represents a number of bytes in the
+   uncompressed data stream.
+*/
+z_off_t ZEXPORT gztell (file)
+    gzFile file;
+{
+    return gzseek(file, 0L, SEEK_CUR);
+}
+
+/* ===========================================================================
+     Returns 1 when EOF has previously been detected reading the given
+   input stream, otherwise zero.
+*/
+int ZEXPORT gzeof (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+    
+    return (s == NULL || s->mode != 'r') ? 0 : s->z_eof;
+}
+
+/* ===========================================================================
+   Outputs a long in LSB order to the given file
+*/
+local void putLong (file, x)
+    FILE *file;
+    uLong x;
+{
+    int n;
+    for (n = 0; n < 4; n++) {
+        fputc((int)(x & 0xff), file);
+        x >>= 8;
+    }
+}
+
+/* ===========================================================================
+   Reads a long in LSB order from the given gz_stream. Sets z_err in case
+   of error.
+*/
+local uLong getLong (s)
+    gz_stream *s;
+{
+    uLong x = (uLong)get_byte(s);
+    int c;
+
+    x += ((uLong)get_byte(s))<<8;
+    x += ((uLong)get_byte(s))<<16;
+    c = get_byte(s);
+    if (c == EOF) s->z_err = Z_DATA_ERROR;
+    x += ((uLong)c)<<24;
+    return x;
+}
+
+/* ===========================================================================
+     Flushes all pending output if necessary, closes the compressed file
+   and deallocates all the (de)compression state.
+*/
+int ZEXPORT gzclose (file)
+    gzFile file;
+{
+    int err;
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL) return Z_STREAM_ERROR;
+
+    if (s->mode == 'w') {
+#ifdef NO_DEFLATE
+	return Z_STREAM_ERROR;
+#else
+        err = do_flush (file, Z_FINISH);
+        if (err != Z_OK) return destroy((gz_stream*)file);
+
+        putLong (s->file, s->crc);
+        putLong (s->file, s->stream.total_in);
+#endif
+    }
+    return destroy((gz_stream*)file);
+}
+
+/* ===========================================================================
+     Returns the error message for the last error which occured on the
+   given compressed file. errnum is set to zlib error number. If an
+   error occured in the file system and not in the compression library,
+   errnum is set to Z_ERRNO and the application may consult errno
+   to get the exact error code.
+*/
+const char*  ZEXPORT gzerror (file, errnum)
+    gzFile file;
+    int *errnum;
+{
+    char *m;
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL) {
+        *errnum = Z_STREAM_ERROR;
+        return (const char*)ERR_MSG(Z_STREAM_ERROR);
+    }
+    *errnum = s->z_err;
+    if (*errnum == Z_OK) return (const char*)"";
+
+    m =  (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg);
+
+    if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err);
+
+    TRYFREE(s->msg);
+    s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3);
+    strcpy(s->msg, s->path);
+    strcat(s->msg, ": ");
+    strcat(s->msg, m);
+    return (const char*)s->msg;
+}
diff --git a/ZLIB/infblock.c b/ZLIB/infblock.c
new file mode 100644
index 0000000..dd7a6d4
--- /dev/null
+++ b/ZLIB/infblock.c
@@ -0,0 +1,403 @@
+/* infblock.c -- interpret and process block types to last block
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "infblock.h"
+#include "inftrees.h"
+#include "infcodes.h"
+#include "infutil.h"
+
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* Table for deflate from PKZIP's appnote.txt. */
+local const uInt border[] = { /* Order of the bit length code lengths */
+        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/*
+   Notes beyond the 1.93a appnote.txt:
+
+   1. Distance pointers never point before the beginning of the output
+      stream.
+   2. Distance pointers can point back across blocks, up to 32k away.
+   3. There is an implied maximum of 7 bits for the bit length table and
+      15 bits for the actual data.
+   4. If only one code exists, then it is encoded using one bit.  (Zero
+      would be more efficient, but perhaps a little confusing.)  If two
+      codes exist, they are coded using one bit each (0 and 1).
+   5. There is no way of sending zero distance codes--a dummy must be
+      sent if there are none.  (History: a pre 2.0 version of PKZIP would
+      store blocks with no distance codes, but this was discovered to be
+      too harsh a criterion.)  Valid only for 1.93a.  2.04c does allow
+      zero distance codes, which is sent as one code of zero bits in
+      length.
+   6. There are up to 286 literal/length codes.  Code 256 represents the
+      end-of-block.  Note however that the static length tree defines
+      288 codes just to fill out the Huffman codes.  Codes 286 and 287
+      cannot be used though, since there is no length base or extra bits
+      defined for them.  Similarily, there are up to 30 distance codes.
+      However, static trees define 32 codes (all 5 bits) to fill out the
+      Huffman codes, but the last two had better not show up in the data.
+   7. Unzip can check dynamic Huffman blocks for complete code sets.
+      The exception is that a single code would not be complete (see #4).
+   8. The five bits following the block type is really the number of
+      literal codes sent minus 257.
+   9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
+      (1+6+6).  Therefore, to output three times the length, you output
+      three codes (1+1+1), whereas to output four times the same length,
+      you only need two codes (1+3).  Hmm.
+  10. In the tree reconstruction algorithm, Code = Code + Increment
+      only if BitLength(i) is not zero.  (Pretty obvious.)
+  11. Correction: 4 Bits: # of Bit Length codes - 4     (4 - 19)
+  12. Note: length code 284 can represent 227-258, but length code 285
+      really is 258.  The last length deserves its own, short code
+      since it gets used a lot in very redundant files.  The length
+      258 is special since 258 - 3 (the min match length) is 255.
+  13. The literal/length and distance code bit lengths are read as a
+      single stream of lengths.  It is possible (and advantageous) for
+      a repeat code (16, 17, or 18) to go across the boundary between
+      the two sets of lengths.
+ */
+
+
+void inflate_blocks_reset(s, z, c)
+inflate_blocks_statef *s;
+z_streamp z;
+uLongf *c;
+{
+  if (c != Z_NULL)
+    *c = s->check;
+  if (s->mode == BTREE || s->mode == DTREE)
+    ZFREE(z, s->sub.trees.blens);
+  if (s->mode == CODES)
+    inflate_codes_free(s->sub.decode.codes, z);
+  s->mode = TYPE;
+  s->bitk = 0;
+  s->bitb = 0;
+  s->read = s->write = s->window;
+  if (s->checkfn != Z_NULL)
+    z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0);
+  Tracev((stderr, "inflate:   blocks reset\n"));
+}
+
+
+inflate_blocks_statef *inflate_blocks_new(z, c, w)
+z_streamp z;
+check_func c;
+uInt w;
+{
+  inflate_blocks_statef *s;
+
+  if ((s = (inflate_blocks_statef *)ZALLOC
+       (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
+    return s;
+  if ((s->hufts =
+       (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL)
+  {
+    ZFREE(z, s);
+    return Z_NULL;
+  }
+  if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
+  {
+    ZFREE(z, s->hufts);
+    ZFREE(z, s);
+    return Z_NULL;
+  }
+  s->end = s->window + w;
+  s->checkfn = c;
+  s->mode = TYPE;
+  Tracev((stderr, "inflate:   blocks allocated\n"));
+  inflate_blocks_reset(s, z, Z_NULL);
+  return s;
+}
+
+
+int inflate_blocks(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+  uInt t;               /* temporary storage */
+  uLong b;              /* bit buffer */
+  uInt k;               /* bits in bit buffer */
+  Bytef *p;             /* input data pointer */
+  uInt n;               /* bytes available there */
+  Bytef *q;             /* output window write pointer */
+  uInt m;               /* bytes to end of window or read pointer */
+
+  /* copy input/output information to locals (UPDATE macro restores) */
+  LOAD
+
+  /* process input based on current state */
+  while (1) switch (s->mode)
+  {
+    case TYPE:
+      NEEDBITS(3)
+      t = (uInt)b & 7;
+      s->last = t & 1;
+      switch (t >> 1)
+      {
+        case 0:                         /* stored */
+          Tracev((stderr, "inflate:     stored block%s\n",
+                 s->last ? " (last)" : ""));
+          DUMPBITS(3)
+          t = k & 7;                    /* go to byte boundary */
+          DUMPBITS(t)
+          s->mode = LENS;               /* get length of stored block */
+          break;
+        case 1:                         /* fixed */
+          Tracev((stderr, "inflate:     fixed codes block%s\n",
+                 s->last ? " (last)" : ""));
+          {
+            uInt bl, bd;
+            inflate_huft *tl, *td;
+
+            inflate_trees_fixed(&bl, &bd, &tl, &td, z);
+            s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
+            if (s->sub.decode.codes == Z_NULL)
+            {
+              r = Z_MEM_ERROR;
+              LEAVE
+            }
+          }
+          DUMPBITS(3)
+          s->mode = CODES;
+          break;
+        case 2:                         /* dynamic */
+          Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                 s->last ? " (last)" : ""));
+          DUMPBITS(3)
+          s->mode = TABLE;
+          break;
+        case 3:                         /* illegal */
+          DUMPBITS(3)
+          s->mode = BAD;
+          z->msg = (char*)"invalid block type";
+          r = Z_DATA_ERROR;
+          LEAVE
+      }
+      break;
+    case LENS:
+      NEEDBITS(32)
+      if ((((~b) >> 16) & 0xffff) != (b & 0xffff))
+      {
+        s->mode = BAD;
+        z->msg = (char*)"invalid stored block lengths";
+        r = Z_DATA_ERROR;
+        LEAVE
+      }
+      s->sub.left = (uInt)b & 0xffff;
+      b = k = 0;                      /* dump bits */
+      Tracev((stderr, "inflate:       stored length %u\n", s->sub.left));
+      s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE);
+      break;
+    case STORED:
+      if (n == 0)
+        LEAVE
+      NEEDOUT
+      t = s->sub.left;
+      if (t > n) t = n;
+      if (t > m) t = m;
+      zmemcpy(q, p, t);
+      p += t;  n -= t;
+      q += t;  m -= t;
+      if ((s->sub.left -= t) != 0)
+        break;
+      Tracev((stderr, "inflate:       stored end, %lu total out\n",
+              z->total_out + (q >= s->read ? q - s->read :
+              (s->end - s->read) + (q - s->window))));
+      s->mode = s->last ? DRY : TYPE;
+      break;
+    case TABLE:
+      NEEDBITS(14)
+      s->sub.trees.table = t = (uInt)b & 0x3fff;
+#ifndef PKZIP_BUG_WORKAROUND
+      if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
+      {
+        s->mode = BAD;
+        z->msg = (char*)"too many length or distance symbols";
+        r = Z_DATA_ERROR;
+        LEAVE
+      }
+#endif
+      t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
+      if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
+      {
+        r = Z_MEM_ERROR;
+        LEAVE
+      }
+      DUMPBITS(14)
+      s->sub.trees.index = 0;
+      Tracev((stderr, "inflate:       table sizes ok\n"));
+      s->mode = BTREE;
+    case BTREE:
+      while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
+      {
+        NEEDBITS(3)
+        s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
+        DUMPBITS(3)
+      }
+      while (s->sub.trees.index < 19)
+        s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
+      s->sub.trees.bb = 7;
+      t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
+                             &s->sub.trees.tb, s->hufts, z);
+      if (t != Z_OK)
+      {
+        r = t;
+        if (r == Z_DATA_ERROR)
+        {
+          ZFREE(z, s->sub.trees.blens);
+          s->mode = BAD;
+        }
+        LEAVE
+      }
+      s->sub.trees.index = 0;
+      Tracev((stderr, "inflate:       bits tree ok\n"));
+      s->mode = DTREE;
+    case DTREE:
+      while (t = s->sub.trees.table,
+             s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
+      {
+        inflate_huft *h;
+        uInt i, j, c;
+
+        t = s->sub.trees.bb;
+        NEEDBITS(t)
+        h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
+        t = h->bits;
+        c = h->base;
+        if (c < 16)
+        {
+          DUMPBITS(t)
+          s->sub.trees.blens[s->sub.trees.index++] = c;
+        }
+        else /* c == 16..18 */
+        {
+          i = c == 18 ? 7 : c - 14;
+          j = c == 18 ? 11 : 3;
+          NEEDBITS(t + i)
+          DUMPBITS(t)
+          j += (uInt)b & inflate_mask[i];
+          DUMPBITS(i)
+          i = s->sub.trees.index;
+          t = s->sub.trees.table;
+          if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
+              (c == 16 && i < 1))
+          {
+            ZFREE(z, s->sub.trees.blens);
+            s->mode = BAD;
+            z->msg = (char*)"invalid bit length repeat";
+            r = Z_DATA_ERROR;
+            LEAVE
+          }
+          c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
+          do {
+            s->sub.trees.blens[i++] = c;
+          } while (--j);
+          s->sub.trees.index = i;
+        }
+      }
+      s->sub.trees.tb = Z_NULL;
+      {
+        uInt bl, bd;
+        inflate_huft *tl, *td;
+        inflate_codes_statef *c;
+
+        bl = 9;         /* must be <= 9 for lookahead assumptions */
+        bd = 6;         /* must be <= 9 for lookahead assumptions */
+        t = s->sub.trees.table;
+        t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
+                                  s->sub.trees.blens, &bl, &bd, &tl, &td,
+                                  s->hufts, z);
+        if (t != Z_OK)
+        {
+          if (t == (uInt)Z_DATA_ERROR)
+          {
+            ZFREE(z, s->sub.trees.blens);
+            s->mode = BAD;
+          }
+          r = t;
+          LEAVE
+        }
+        Tracev((stderr, "inflate:       trees ok\n"));
+        if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
+        {
+          r = Z_MEM_ERROR;
+          LEAVE
+        }
+        s->sub.decode.codes = c;
+      }
+      ZFREE(z, s->sub.trees.blens);
+      s->mode = CODES;
+    case CODES:
+      UPDATE
+      if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
+        return inflate_flush(s, z, r);
+      r = Z_OK;
+      inflate_codes_free(s->sub.decode.codes, z);
+      LOAD
+      Tracev((stderr, "inflate:       codes end, %lu total out\n",
+              z->total_out + (q >= s->read ? q - s->read :
+              (s->end - s->read) + (q - s->window))));
+      if (!s->last)
+      {
+        s->mode = TYPE;
+        break;
+      }
+      s->mode = DRY;
+    case DRY:
+      FLUSH
+      if (s->read != s->write)
+        LEAVE
+      s->mode = DONE;
+    case DONE:
+      r = Z_STREAM_END;
+      LEAVE
+    case BAD:
+      r = Z_DATA_ERROR;
+      LEAVE
+    default:
+      r = Z_STREAM_ERROR;
+      LEAVE
+  }
+}
+
+
+int inflate_blocks_free(s, z)
+inflate_blocks_statef *s;
+z_streamp z;
+{
+  inflate_blocks_reset(s, z, Z_NULL);
+  ZFREE(z, s->window);
+  ZFREE(z, s->hufts);
+  ZFREE(z, s);
+  Tracev((stderr, "inflate:   blocks freed\n"));
+  return Z_OK;
+}
+
+
+void inflate_set_dictionary(s, d, n)
+inflate_blocks_statef *s;
+const Bytef *d;
+uInt  n;
+{
+  zmemcpy(s->window, d, n);
+  s->read = s->write = s->window + n;
+}
+
+
+/* Returns true if inflate is currently at the end of a block generated
+ * by Z_SYNC_FLUSH or Z_FULL_FLUSH. 
+ * IN assertion: s != Z_NULL
+ */
+int inflate_blocks_sync_point(s)
+inflate_blocks_statef *s;
+{
+  return s->mode == LENS;
+}
diff --git a/ZLIB/infblock.h b/ZLIB/infblock.h
new file mode 100644
index 0000000..173b226
--- /dev/null
+++ b/ZLIB/infblock.h
@@ -0,0 +1,39 @@
+/* infblock.h -- header to use infblock.c
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_blocks_state;
+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
+
+extern inflate_blocks_statef * inflate_blocks_new OF((
+    z_streamp z,
+    check_func c,               /* check function */
+    uInt w));                   /* window size */
+
+extern int inflate_blocks OF((
+    inflate_blocks_statef *,
+    z_streamp ,
+    int));                      /* initial return code */
+
+extern void inflate_blocks_reset OF((
+    inflate_blocks_statef *,
+    z_streamp ,
+    uLongf *));                  /* check value on output */
+
+extern int inflate_blocks_free OF((
+    inflate_blocks_statef *,
+    z_streamp));
+
+extern void inflate_set_dictionary OF((
+    inflate_blocks_statef *s,
+    const Bytef *d,  /* dictionary */
+    uInt  n));       /* dictionary length */
+
+extern int inflate_blocks_sync_point OF((
+    inflate_blocks_statef *s));
diff --git a/ZLIB/infcodes.c b/ZLIB/infcodes.c
new file mode 100644
index 0000000..9abe541
--- /dev/null
+++ b/ZLIB/infcodes.c
@@ -0,0 +1,251 @@
+/* infcodes.c -- process literals and length/distance pairs
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "infblock.h"
+#include "infcodes.h"
+#include "infutil.h"
+#include "inffast.h"
+
+/* simplify the use of the inflate_huft type with some defines */
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+typedef enum {        /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+      START,    /* x: set up for LEN */
+      LEN,      /* i: get length/literal/eob next */
+      LENEXT,   /* i: getting length extra (have base) */
+      DIST,     /* i: get distance next */
+      DISTEXT,  /* i: getting distance extra */
+      COPY,     /* o: copying bytes in window, waiting for space */
+      LIT,      /* o: got literal, waiting for output space */
+      WASH,     /* o: got eob, possibly still output waiting */
+      END,      /* x: got eob and all data flushed */
+      BADCODE}  /* x: got error */
+inflate_codes_mode;
+
+/* inflate codes private state */
+struct inflate_codes_state {
+
+  /* mode */
+  inflate_codes_mode mode;      /* current inflate_codes mode */
+
+  /* mode dependent information */
+  uInt len;
+  union {
+    struct {
+      inflate_huft *tree;       /* pointer into tree */
+      uInt need;                /* bits needed */
+    } code;             /* if LEN or DIST, where in tree */
+    uInt lit;           /* if LIT, literal */
+    struct {
+      uInt get;                 /* bits to get for extra */
+      uInt dist;                /* distance back to copy from */
+    } copy;             /* if EXT or COPY, where and how much */
+  } sub;                /* submode */
+
+  /* mode independent information */
+  Byte lbits;           /* ltree bits decoded per branch */
+  Byte dbits;           /* dtree bits decoder per branch */
+  inflate_huft *ltree;          /* literal/length/eob tree */
+  inflate_huft *dtree;          /* distance tree */
+
+};
+
+
+inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z)
+uInt bl, bd;
+inflate_huft *tl;
+inflate_huft *td; /* need separate declaration for Borland C++ */
+z_streamp z;
+{
+  inflate_codes_statef *c;
+
+  if ((c = (inflate_codes_statef *)
+       ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL)
+  {
+    c->mode = START;
+    c->lbits = (Byte)bl;
+    c->dbits = (Byte)bd;
+    c->ltree = tl;
+    c->dtree = td;
+    Tracev((stderr, "inflate:       codes new\n"));
+  }
+  return c;
+}
+
+
+int inflate_codes(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+  uInt j;               /* temporary storage */
+  inflate_huft *t;      /* temporary pointer */
+  uInt e;               /* extra bits or operation */
+  uLong b;              /* bit buffer */
+  uInt k;               /* bits in bit buffer */
+  Bytef *p;             /* input data pointer */
+  uInt n;               /* bytes available there */
+  Bytef *q;             /* output window write pointer */
+  uInt m;               /* bytes to end of window or read pointer */
+  Bytef *f;             /* pointer to copy strings from */
+  inflate_codes_statef *c = s->sub.decode.codes;  /* codes state */
+
+  /* copy input/output information to locals (UPDATE macro restores) */
+  LOAD
+
+  /* process input and output based on current state */
+  while (1) switch (c->mode)
+  {             /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+    case START:         /* x: set up for LEN */
+#ifndef SLOW
+      if (m >= 258 && n >= 10)
+      {
+        UPDATE
+        r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z);
+        LOAD
+        if (r != Z_OK)
+        {
+          c->mode = r == Z_STREAM_END ? WASH : BADCODE;
+          break;
+        }
+      }
+#endif /* !SLOW */
+      c->sub.code.need = c->lbits;
+      c->sub.code.tree = c->ltree;
+      c->mode = LEN;
+    case LEN:           /* i: get length/literal/eob next */
+      j = c->sub.code.need;
+      NEEDBITS(j)
+      t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+      DUMPBITS(t->bits)
+      e = (uInt)(t->exop);
+      if (e == 0)               /* literal */
+      {
+        c->sub.lit = t->base;
+        Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+                 "inflate:         literal '%c'\n" :
+                 "inflate:         literal 0x%02x\n", t->base));
+        c->mode = LIT;
+        break;
+      }
+      if (e & 16)               /* length */
+      {
+        c->sub.copy.get = e & 15;
+        c->len = t->base;
+        c->mode = LENEXT;
+        break;
+      }
+      if ((e & 64) == 0)        /* next table */
+      {
+        c->sub.code.need = e;
+        c->sub.code.tree = t + t->base;
+        break;
+      }
+      if (e & 32)               /* end of block */
+      {
+        Tracevv((stderr, "inflate:         end of block\n"));
+        c->mode = WASH;
+        break;
+      }
+      c->mode = BADCODE;        /* invalid code */
+      z->msg = (char*)"invalid literal/length code";
+      r = Z_DATA_ERROR;
+      LEAVE
+    case LENEXT:        /* i: getting length extra (have base) */
+      j = c->sub.copy.get;
+      NEEDBITS(j)
+      c->len += (uInt)b & inflate_mask[j];
+      DUMPBITS(j)
+      c->sub.code.need = c->dbits;
+      c->sub.code.tree = c->dtree;
+      Tracevv((stderr, "inflate:         length %u\n", c->len));
+      c->mode = DIST;
+    case DIST:          /* i: get distance next */
+      j = c->sub.code.need;
+      NEEDBITS(j)
+      t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+      DUMPBITS(t->bits)
+      e = (uInt)(t->exop);
+      if (e & 16)               /* distance */
+      {
+        c->sub.copy.get = e & 15;
+        c->sub.copy.dist = t->base;
+        c->mode = DISTEXT;
+        break;
+      }
+      if ((e & 64) == 0)        /* next table */
+      {
+        c->sub.code.need = e;
+        c->sub.code.tree = t + t->base;
+        break;
+      }
+      c->mode = BADCODE;        /* invalid code */
+      z->msg = (char*)"invalid distance code";
+      r = Z_DATA_ERROR;
+      LEAVE
+    case DISTEXT:       /* i: getting distance extra */
+      j = c->sub.copy.get;
+      NEEDBITS(j)
+      c->sub.copy.dist += (uInt)b & inflate_mask[j];
+      DUMPBITS(j)
+      Tracevv((stderr, "inflate:         distance %u\n", c->sub.copy.dist));
+      c->mode = COPY;
+    case COPY:          /* o: copying bytes in window, waiting for space */
+      f = q - c->sub.copy.dist;
+      while (f < s->window)             /* modulo window size-"while" instead */
+        f += s->end - s->window;        /* of "if" handles invalid distances */
+      while (c->len)
+      {
+        NEEDOUT
+        OUTBYTE(*f++)
+        if (f == s->end)
+          f = s->window;
+        c->len--;
+      }
+      c->mode = START;
+      break;
+    case LIT:           /* o: got literal, waiting for output space */
+      NEEDOUT
+      OUTBYTE(c->sub.lit)
+      c->mode = START;
+      break;
+    case WASH:          /* o: got eob, possibly more output */
+      if (k > 7)        /* return unused byte, if any */
+      {
+        Assert(k < 16, "inflate_codes grabbed too many bytes")
+        k -= 8;
+        n++;
+        p--;            /* can always return one */
+      }
+      FLUSH
+      if (s->read != s->write)
+        LEAVE
+      c->mode = END;
+    case END:
+      r = Z_STREAM_END;
+      LEAVE
+    case BADCODE:       /* x: got error */
+      r = Z_DATA_ERROR;
+      LEAVE
+    default:
+      r = Z_STREAM_ERROR;
+      LEAVE
+  }
+#ifdef NEED_DUMMY_RETURN
+  return Z_STREAM_ERROR;  /* Some dumb compilers complain without this */
+#endif
+}
+
+
+void inflate_codes_free(c, z)
+inflate_codes_statef *c;
+z_streamp z;
+{
+  ZFREE(z, c);
+  Tracev((stderr, "inflate:       codes free\n"));
+}
diff --git a/ZLIB/infcodes.h b/ZLIB/infcodes.h
new file mode 100644
index 0000000..46821a0
--- /dev/null
+++ b/ZLIB/infcodes.h
@@ -0,0 +1,27 @@
+/* infcodes.h -- header to use infcodes.c
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_codes_state;
+typedef struct inflate_codes_state FAR inflate_codes_statef;
+
+extern inflate_codes_statef *inflate_codes_new OF((
+    uInt, uInt,
+    inflate_huft *, inflate_huft *,
+    z_streamp ));
+
+extern int inflate_codes OF((
+    inflate_blocks_statef *,
+    z_streamp ,
+    int));
+
+extern void inflate_codes_free OF((
+    inflate_codes_statef *,
+    z_streamp ));
+
diff --git a/ZLIB/inffast.c b/ZLIB/inffast.c
new file mode 100644
index 0000000..aa7f1d4
--- /dev/null
+++ b/ZLIB/inffast.c
@@ -0,0 +1,183 @@
+/* inffast.c -- process literals and length/distance pairs fast
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "infblock.h"
+#include "infcodes.h"
+#include "infutil.h"
+#include "inffast.h"
+
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* macros for bit input with no checking and for returning unused bytes */
+#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;}
+
+/* Called with number of bytes left to write in window at least 258
+   (the maximum string length) and number of input bytes available
+   at least ten.  The ten bytes are six bytes for the longest length/
+   distance pair plus four bytes for overloading the bit buffer. */
+
+int inflate_fast(bl, bd, tl, td, s, z)
+uInt bl, bd;
+inflate_huft *tl;
+inflate_huft *td; /* need separate declaration for Borland C++ */
+inflate_blocks_statef *s;
+z_streamp z;
+{
+  inflate_huft *t;      /* temporary pointer */
+  uInt e;               /* extra bits or operation */
+  uLong b;              /* bit buffer */
+  uInt k;               /* bits in bit buffer */
+  Bytef *p;             /* input data pointer */
+  uInt n;               /* bytes available there */
+  Bytef *q;             /* output window write pointer */
+  uInt m;               /* bytes to end of window or read pointer */
+  uInt ml;              /* mask for literal/length tree */
+  uInt md;              /* mask for distance tree */
+  uInt c;               /* bytes to copy */
+  uInt d;               /* distance back to copy from */
+  Bytef *r;             /* copy source pointer */
+
+  /* load input, output, bit values */
+  LOAD
+
+  /* initialize masks */
+  ml = inflate_mask[bl];
+  md = inflate_mask[bd];
+
+  /* do until not enough input or output space for fast loop */
+  do {                          /* assume called with m >= 258 && n >= 10 */
+    /* get literal/length code */
+    GRABBITS(20)                /* max bits for literal/length code */
+    if ((e = (t = tl + ((uInt)b & ml))->exop) == 0)
+    {
+      DUMPBITS(t->bits)
+      Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+                "inflate:         * literal '%c'\n" :
+                "inflate:         * literal 0x%02x\n", t->base));
+      *q++ = (Byte)t->base;
+      m--;
+      continue;
+    }
+    do {
+      DUMPBITS(t->bits)
+      if (e & 16)
+      {
+        /* get extra bits for length */
+        e &= 15;
+        c = t->base + ((uInt)b & inflate_mask[e]);
+        DUMPBITS(e)
+        Tracevv((stderr, "inflate:         * length %u\n", c));
+
+        /* decode distance base of block to copy */
+        GRABBITS(15);           /* max bits for distance code */
+        e = (t = td + ((uInt)b & md))->exop;
+        do {
+          DUMPBITS(t->bits)
+          if (e & 16)
+          {
+            /* get extra bits to add to distance base */
+            e &= 15;
+            GRABBITS(e)         /* get extra bits (up to 13) */
+            d = t->base + ((uInt)b & inflate_mask[e]);
+            DUMPBITS(e)
+            Tracevv((stderr, "inflate:         * distance %u\n", d));
+
+            /* do the copy */
+            m -= c;
+            r = q - d;
+            if (r < s->window)                  /* wrap if needed */
+            {
+              do {
+                r += s->end - s->window;        /* force pointer in window */
+              } while (r < s->window);          /* covers invalid distances */
+              e = s->end - r;
+              if (c > e)
+              {
+                c -= e;                         /* wrapped copy */
+                do {
+                    *q++ = *r++;
+                } while (--e);
+                r = s->window;
+                do {
+                    *q++ = *r++;
+                } while (--c);
+              }
+              else                              /* normal copy */
+              {
+                *q++ = *r++;  c--;
+                *q++ = *r++;  c--;
+                do {
+                    *q++ = *r++;
+                } while (--c);
+              }
+            }
+            else                                /* normal copy */
+            {
+              *q++ = *r++;  c--;
+              *q++ = *r++;  c--;
+              do {
+                *q++ = *r++;
+              } while (--c);
+            }
+            break;
+          }
+          else if ((e & 64) == 0)
+          {
+            t += t->base;
+            e = (t += ((uInt)b & inflate_mask[e]))->exop;
+          }
+          else
+          {
+            z->msg = (char*)"invalid distance code";
+            UNGRAB
+            UPDATE
+            return Z_DATA_ERROR;
+          }
+        } while (1);
+        break;
+      }
+      if ((e & 64) == 0)
+      {
+        t += t->base;
+        if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0)
+        {
+          DUMPBITS(t->bits)
+          Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
+                    "inflate:         * literal '%c'\n" :
+                    "inflate:         * literal 0x%02x\n", t->base));
+          *q++ = (Byte)t->base;
+          m--;
+          break;
+        }
+      }
+      else if (e & 32)
+      {
+        Tracevv((stderr, "inflate:         * end of block\n"));
+        UNGRAB
+        UPDATE
+        return Z_STREAM_END;
+      }
+      else
+      {
+        z->msg = (char*)"invalid literal/length code";
+        UNGRAB
+        UPDATE
+        return Z_DATA_ERROR;
+      }
+    } while (1);
+  } while (m >= 258 && n >= 10);
+
+  /* not enough input or output--restore pointers and return */
+  UNGRAB
+  UPDATE
+  return Z_OK;
+}
diff --git a/ZLIB/inffast.h b/ZLIB/inffast.h
new file mode 100644
index 0000000..a31a4bb
--- /dev/null
+++ b/ZLIB/inffast.h
@@ -0,0 +1,17 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+extern int inflate_fast OF((
+    uInt,
+    uInt,
+    inflate_huft *,
+    inflate_huft *,
+    inflate_blocks_statef *,
+    z_streamp ));
diff --git a/ZLIB/inffixed.h b/ZLIB/inffixed.h
new file mode 100644
index 0000000..77f7e76
--- /dev/null
+++ b/ZLIB/inffixed.h
@@ -0,0 +1,151 @@
+/* inffixed.h -- table for decoding fixed codes
+ * Generated automatically by the maketree.c program
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+local uInt fixed_bl = 9;
+local uInt fixed_bd = 5;
+local inflate_huft fixed_tl[] = {
+    {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
+    {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192},
+    {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160},
+    {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224},
+    {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144},
+    {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208},
+    {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176},
+    {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240},
+    {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
+    {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200},
+    {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168},
+    {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232},
+    {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152},
+    {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216},
+    {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184},
+    {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248},
+    {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
+    {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196},
+    {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164},
+    {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228},
+    {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148},
+    {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212},
+    {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180},
+    {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244},
+    {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
+    {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204},
+    {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172},
+    {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236},
+    {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156},
+    {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220},
+    {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188},
+    {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252},
+    {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
+    {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194},
+    {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162},
+    {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226},
+    {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146},
+    {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210},
+    {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178},
+    {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242},
+    {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
+    {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202},
+    {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170},
+    {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234},
+    {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154},
+    {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218},
+    {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186},
+    {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250},
+    {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
+    {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198},
+    {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166},
+    {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230},
+    {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150},
+    {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214},
+    {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182},
+    {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246},
+    {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
+    {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206},
+    {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174},
+    {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238},
+    {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158},
+    {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222},
+    {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190},
+    {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254},
+    {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
+    {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193},
+    {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161},
+    {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225},
+    {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145},
+    {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209},
+    {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177},
+    {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241},
+    {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
+    {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201},
+    {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169},
+    {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233},
+    {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153},
+    {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217},
+    {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185},
+    {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249},
+    {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
+    {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197},
+    {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165},
+    {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229},
+    {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149},
+    {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213},
+    {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181},
+    {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245},
+    {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
+    {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205},
+    {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173},
+    {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237},
+    {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157},
+    {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221},
+    {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189},
+    {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253},
+    {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
+    {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195},
+    {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163},
+    {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227},
+    {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147},
+    {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211},
+    {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179},
+    {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243},
+    {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
+    {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203},
+    {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171},
+    {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235},
+    {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155},
+    {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219},
+    {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187},
+    {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251},
+    {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
+    {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199},
+    {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167},
+    {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231},
+    {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151},
+    {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215},
+    {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183},
+    {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247},
+    {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
+    {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207},
+    {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175},
+    {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239},
+    {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159},
+    {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223},
+    {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191},
+    {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255}
+  };
+local inflate_huft fixed_td[] = {
+    {{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097},
+    {{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385},
+    {{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193},
+    {{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577},
+    {{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145},
+    {{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577},
+    {{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289},
+    {{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577}
+  };
diff --git a/ZLIB/inflate.c b/ZLIB/inflate.c
new file mode 100644
index 0000000..dfb2e86
--- /dev/null
+++ b/ZLIB/inflate.c
@@ -0,0 +1,366 @@
+/* inflate.c -- zlib interface to inflate modules
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "infblock.h"
+
+struct inflate_blocks_state {int dummy;}; /* for buggy compilers */
+
+typedef enum {
+      METHOD,   /* waiting for method byte */
+      FLAG,     /* waiting for flag byte */
+      DICT4,    /* four dictionary check bytes to go */
+      DICT3,    /* three dictionary check bytes to go */
+      DICT2,    /* two dictionary check bytes to go */
+      DICT1,    /* one dictionary check byte to go */
+      DICT0,    /* waiting for inflateSetDictionary */
+      BLOCKS,   /* decompressing blocks */
+      CHECK4,   /* four check bytes to go */
+      CHECK3,   /* three check bytes to go */
+      CHECK2,   /* two check bytes to go */
+      CHECK1,   /* one check byte to go */
+      DONE,     /* finished check, done */
+      BAD}      /* got an error--stay here */
+inflate_mode;
+
+/* inflate private state */
+struct internal_state {
+
+  /* mode */
+  inflate_mode  mode;   /* current inflate mode */
+
+  /* mode dependent information */
+  union {
+    uInt method;        /* if FLAGS, method byte */
+    struct {
+      uLong was;                /* computed check value */
+      uLong need;               /* stream check value */
+    } check;            /* if CHECK, check values to compare */
+    uInt marker;        /* if BAD, inflateSync's marker bytes count */
+  } sub;        /* submode */
+
+  /* mode independent information */
+  int  nowrap;          /* flag for no wrapper */
+  uInt wbits;           /* log2(window size)  (8..15, defaults to 15) */
+  inflate_blocks_statef 
+    *blocks;            /* current inflate_blocks state */
+
+};
+
+
+int ZEXPORT inflateReset(z)
+z_streamp z;
+{
+  if (z == Z_NULL || z->state == Z_NULL)
+    return Z_STREAM_ERROR;
+  z->total_in = z->total_out = 0;
+  z->msg = Z_NULL;
+  z->state->mode = z->state->nowrap ? BLOCKS : METHOD;
+  inflate_blocks_reset(z->state->blocks, z, Z_NULL);
+  Tracev((stderr, "inflate: reset\n"));
+  return Z_OK;
+}
+
+
+int ZEXPORT inflateEnd(z)
+z_streamp z;
+{
+  if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL)
+    return Z_STREAM_ERROR;
+  if (z->state->blocks != Z_NULL)
+    inflate_blocks_free(z->state->blocks, z);
+  ZFREE(z, z->state);
+  z->state = Z_NULL;
+  Tracev((stderr, "inflate: end\n"));
+  return Z_OK;
+}
+
+
+int ZEXPORT inflateInit2_(z, w, version, stream_size)
+z_streamp z;
+int w;
+const char *version;
+int stream_size;
+{
+  if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+      stream_size != sizeof(z_stream))
+      return Z_VERSION_ERROR;
+
+  /* initialize state */
+  if (z == Z_NULL)
+    return Z_STREAM_ERROR;
+  z->msg = Z_NULL;
+  if (z->zalloc == Z_NULL)
+  {
+    z->zalloc = zcalloc;
+    z->opaque = (voidpf)0;
+  }
+  if (z->zfree == Z_NULL) z->zfree = zcfree;
+  if ((z->state = (struct internal_state FAR *)
+       ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL)
+    return Z_MEM_ERROR;
+  z->state->blocks = Z_NULL;
+
+  /* handle undocumented nowrap option (no zlib header or check) */
+  z->state->nowrap = 0;
+  if (w < 0)
+  {
+    w = - w;
+    z->state->nowrap = 1;
+  }
+
+  /* set window size */
+  if (w < 8 || w > 15)
+  {
+    inflateEnd(z);
+    return Z_STREAM_ERROR;
+  }
+  z->state->wbits = (uInt)w;
+
+  /* create inflate_blocks state */
+  if ((z->state->blocks =
+      inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w))
+      == Z_NULL)
+  {
+    inflateEnd(z);
+    return Z_MEM_ERROR;
+  }
+  Tracev((stderr, "inflate: allocated\n"));
+
+  /* reset state */
+  inflateReset(z);
+  return Z_OK;
+}
+
+
+int ZEXPORT inflateInit_(z, version, stream_size)
+z_streamp z;
+const char *version;
+int stream_size;
+{
+  return inflateInit2_(z, DEF_WBITS, version, stream_size);
+}
+
+
+#define NEEDBYTE {if(z->avail_in==0)return r;r=f;}
+#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++)
+
+int ZEXPORT inflate(z, f)
+z_streamp z;
+int f;
+{
+  int r;
+  uInt b;
+
+  if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL)
+    return Z_STREAM_ERROR;
+  f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK;
+  r = Z_BUF_ERROR;
+  while (1) switch (z->state->mode)
+  {
+    case METHOD:
+      NEEDBYTE
+      if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED)
+      {
+        z->state->mode = BAD;
+        z->msg = (char*)"unknown compression method";
+        z->state->sub.marker = 5;       /* can't try inflateSync */
+        break;
+      }
+      if ((z->state->sub.method >> 4) + 8 > z->state->wbits)
+      {
+        z->state->mode = BAD;
+        z->msg = (char*)"invalid window size";
+        z->state->sub.marker = 5;       /* can't try inflateSync */
+        break;
+      }
+      z->state->mode = FLAG;
+    case FLAG:
+      NEEDBYTE
+      b = NEXTBYTE;
+      if (((z->state->sub.method << 8) + b) % 31)
+      {
+        z->state->mode = BAD;
+        z->msg = (char*)"incorrect header check";
+        z->state->sub.marker = 5;       /* can't try inflateSync */
+        break;
+      }
+      Tracev((stderr, "inflate: zlib header ok\n"));
+      if (!(b & PRESET_DICT))
+      {
+        z->state->mode = BLOCKS;
+        break;
+      }
+      z->state->mode = DICT4;
+    case DICT4:
+      NEEDBYTE
+      z->state->sub.check.need = (uLong)NEXTBYTE << 24;
+      z->state->mode = DICT3;
+    case DICT3:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE << 16;
+      z->state->mode = DICT2;
+    case DICT2:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE << 8;
+      z->state->mode = DICT1;
+    case DICT1:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE;
+      z->adler = z->state->sub.check.need;
+      z->state->mode = DICT0;
+      return Z_NEED_DICT;
+    case DICT0:
+      z->state->mode = BAD;
+      z->msg = (char*)"need dictionary";
+      z->state->sub.marker = 0;       /* can try inflateSync */
+      return Z_STREAM_ERROR;
+    case BLOCKS:
+      r = inflate_blocks(z->state->blocks, z, r);
+      if (r == Z_DATA_ERROR)
+      {
+        z->state->mode = BAD;
+        z->state->sub.marker = 0;       /* can try inflateSync */
+        break;
+      }
+      if (r == Z_OK)
+        r = f;
+      if (r != Z_STREAM_END)
+        return r;
+      r = f;
+      inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was);
+      if (z->state->nowrap)
+      {
+        z->state->mode = DONE;
+        break;
+      }
+      z->state->mode = CHECK4;
+    case CHECK4:
+      NEEDBYTE
+      z->state->sub.check.need = (uLong)NEXTBYTE << 24;
+      z->state->mode = CHECK3;
+    case CHECK3:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE << 16;
+      z->state->mode = CHECK2;
+    case CHECK2:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE << 8;
+      z->state->mode = CHECK1;
+    case CHECK1:
+      NEEDBYTE
+      z->state->sub.check.need += (uLong)NEXTBYTE;
+
+      if (z->state->sub.check.was != z->state->sub.check.need)
+      {
+        z->state->mode = BAD;
+        z->msg = (char*)"incorrect data check";
+        z->state->sub.marker = 5;       /* can't try inflateSync */
+        break;
+      }
+      Tracev((stderr, "inflate: zlib check ok\n"));
+      z->state->mode = DONE;
+    case DONE:
+      return Z_STREAM_END;
+    case BAD:
+      return Z_DATA_ERROR;
+    default:
+      return Z_STREAM_ERROR;
+  }
+#ifdef NEED_DUMMY_RETURN
+  return Z_STREAM_ERROR;  /* Some dumb compilers complain without this */
+#endif
+}
+
+
+int ZEXPORT inflateSetDictionary(z, dictionary, dictLength)
+z_streamp z;
+const Bytef *dictionary;
+uInt  dictLength;
+{
+  uInt length = dictLength;
+
+  if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0)
+    return Z_STREAM_ERROR;
+
+  if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR;
+  z->adler = 1L;
+
+  if (length >= ((uInt)1<<z->state->wbits))
+  {
+    length = (1<<z->state->wbits)-1;
+    dictionary += dictLength - length;
+  }
+  inflate_set_dictionary(z->state->blocks, dictionary, length);
+  z->state->mode = BLOCKS;
+  return Z_OK;
+}
+
+
+int ZEXPORT inflateSync(z)
+z_streamp z;
+{
+  uInt n;       /* number of bytes to look at */
+  Bytef *p;     /* pointer to bytes */
+  uInt m;       /* number of marker bytes found in a row */
+  uLong r, w;   /* temporaries to save total_in and total_out */
+
+  /* set up */
+  if (z == Z_NULL || z->state == Z_NULL)
+    return Z_STREAM_ERROR;
+  if (z->state->mode != BAD)
+  {
+    z->state->mode = BAD;
+    z->state->sub.marker = 0;
+  }
+  if ((n = z->avail_in) == 0)
+    return Z_BUF_ERROR;
+  p = z->next_in;
+  m = z->state->sub.marker;
+
+  /* search */
+  while (n && m < 4)
+  {
+    static const Byte mark[4] = {0, 0, 0xff, 0xff};
+    if (*p == mark[m])
+      m++;
+    else if (*p)
+      m = 0;
+    else
+      m = 4 - m;
+    p++, n--;
+  }
+
+  /* restore */
+  z->total_in += p - z->next_in;
+  z->next_in = p;
+  z->avail_in = n;
+  z->state->sub.marker = m;
+
+  /* return no joy or set up to restart on a new block */
+  if (m != 4)
+    return Z_DATA_ERROR;
+  r = z->total_in;  w = z->total_out;
+  inflateReset(z);
+  z->total_in = r;  z->total_out = w;
+  z->state->mode = BLOCKS;
+  return Z_OK;
+}
+
+
+/* Returns true if inflate is currently at the end of a block generated
+ * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+ * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH
+ * but removes the length bytes of the resulting empty stored block. When
+ * decompressing, PPP checks that at the end of input packet, inflate is
+ * waiting for these length bytes.
+ */
+int ZEXPORT inflateSyncPoint(z)
+z_streamp z;
+{
+  if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL)
+    return Z_STREAM_ERROR;
+  return inflate_blocks_sync_point(z->state->blocks);
+}
diff --git a/ZLIB/inftrees.c b/ZLIB/inftrees.c
new file mode 100644
index 0000000..4c32ca3
--- /dev/null
+++ b/ZLIB/inftrees.c
@@ -0,0 +1,454 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+
+#if !defined(BUILDFIXED) && !defined(STDC)
+#  define BUILDFIXED   /* non ANSI compilers may not accept inffixed.h */
+#endif
+
+const char inflate_copyright[] =
+   " inflate 1.1.4 Copyright 1995-2002 Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+struct internal_state  {int dummy;}; /* for buggy compilers */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+
+local int huft_build OF((
+    uIntf *,            /* code lengths in bits */
+    uInt,               /* number of codes */
+    uInt,               /* number of "simple" codes */
+    const uIntf *,      /* list of base values for non-simple codes */
+    const uIntf *,      /* list of extra bits for non-simple codes */
+    inflate_huft * FAR*,/* result: starting table */
+    uIntf *,            /* maximum lookup bits (returns actual) */
+    inflate_huft *,     /* space for trees */
+    uInt *,             /* hufts used in space */
+    uIntf * ));         /* space for values */
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+        /* see note #13 above about 258 */
+local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */
+        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
+local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577};
+local const uInt cpdext[30] = { /* Extra bits for distance codes */
+        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+        12, 12, 13, 13};
+
+/*
+   Huffman code decoding is performed using a multi-level table lookup.
+   The fastest way to decode is to simply build a lookup table whose
+   size is determined by the longest code.  However, the time it takes
+   to build this table can also be a factor if the data being decoded
+   is not very long.  The most common codes are necessarily the
+   shortest codes, so those codes dominate the decoding time, and hence
+   the speed.  The idea is you can have a shorter table that decodes the
+   shorter, more probable codes, and then point to subsidiary tables for
+   the longer codes.  The time it costs to decode the longer codes is
+   then traded against the time it takes to make longer tables.
+
+   This results of this trade are in the variables lbits and dbits
+   below.  lbits is the number of bits the first level table for literal/
+   length codes can decode in one step, and dbits is the same thing for
+   the distance codes.  Subsequent tables are also less than or equal to
+   those sizes.  These values may be adjusted either when all of the
+   codes are shorter than that, in which case the longest code length in
+   bits is used, or when the shortest code is *longer* than the requested
+   table size, in which case the length of the shortest code in bits is
+   used.
+
+   There are two different values for the two tables, since they code a
+   different number of possibilities each.  The literal/length table
+   codes 286 possible values, or in a flat code, a little over eight
+   bits.  The distance table codes 30 possible values, or a little less
+   than five bits, flat.  The optimum values for speed end up being
+   about one bit more than those, so lbits is 8+1 and dbits is 5+1.
+   The optimum values may differ though from machine to machine, and
+   possibly even between compilers.  Your mileage may vary.
+ */
+
+
+/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */
+#define BMAX 15         /* maximum bit length of any code */
+
+local int huft_build(b, n, s, d, e, t, m, hp, hn, v)
+uIntf *b;               /* code lengths in bits (all assumed <= BMAX) */
+uInt n;                 /* number of codes (assumed <= 288) */
+uInt s;                 /* number of simple-valued codes (0..s-1) */
+const uIntf *d;         /* list of base values for non-simple codes */
+const uIntf *e;         /* list of extra bits for non-simple codes */
+inflate_huft * FAR *t;  /* result: starting table */
+uIntf *m;               /* maximum lookup bits, returns actual */
+inflate_huft *hp;       /* space for trees */
+uInt *hn;               /* hufts used in space */
+uIntf *v;               /* working area: values in order of bit length */
+/* Given a list of code lengths and a maximum table size, make a set of
+   tables to decode that set of codes.  Return Z_OK on success, Z_BUF_ERROR
+   if the given code set is incomplete (the tables are still built in this
+   case), or Z_DATA_ERROR if the input is invalid. */
+{
+
+  uInt a;                       /* counter for codes of length k */
+  uInt c[BMAX+1];               /* bit length count table */
+  uInt f;                       /* i repeats in table every f entries */
+  int g;                        /* maximum code length */
+  int h;                        /* table level */
+  register uInt i;              /* counter, current code */
+  register uInt j;              /* counter */
+  register int k;               /* number of bits in current code */
+  int l;                        /* bits per table (returned in m) */
+  uInt mask;                    /* (1 << w) - 1, to avoid cc -O bug on HP */
+  register uIntf *p;            /* pointer into c[], b[], or v[] */
+  inflate_huft *q;              /* points to current table */
+  struct inflate_huft_s r;      /* table entry for structure assignment */
+  inflate_huft *u[BMAX];        /* table stack */
+  register int w;               /* bits before this table == (l * h) */
+  uInt x[BMAX+1];               /* bit offsets, then code stack */
+  uIntf *xp;                    /* pointer into x */
+  int y;                        /* number of dummy codes added */
+  uInt z;                       /* number of entries in current table */
+
+
+  /* Generate counts for each bit length */
+  p = c;
+#define C0 *p++ = 0;
+#define C2 C0 C0 C0 C0
+#define C4 C2 C2 C2 C2
+  C4                            /* clear c[]--assume BMAX+1 is 16 */
+  p = b;  i = n;
+  do {
+    c[*p++]++;                  /* assume all entries <= BMAX */
+  } while (--i);
+  if (c[0] == n)                /* null input--all zero length codes */
+  {
+    *t = (inflate_huft *)Z_NULL;
+    *m = 0;
+    return Z_OK;
+  }
+
+
+  /* Find minimum and maximum length, bound *m by those */
+  l = *m;
+  for (j = 1; j <= BMAX; j++)
+    if (c[j])
+      break;
+  k = j;                        /* minimum code length */
+  if ((uInt)l < j)
+    l = j;
+  for (i = BMAX; i; i--)
+    if (c[i])
+      break;
+  g = i;                        /* maximum code length */
+  if ((uInt)l > i)
+    l = i;
+  *m = l;
+
+
+  /* Adjust last length count to fill out codes, if needed */
+  for (y = 1 << j; j < i; j++, y <<= 1)
+    if ((y -= c[j]) < 0)
+      return Z_DATA_ERROR;
+  if ((y -= c[i]) < 0)
+    return Z_DATA_ERROR;
+  c[i] += y;
+
+
+  /* Generate starting offsets into the value table for each length */
+  x[1] = j = 0;
+  p = c + 1;  xp = x + 2;
+  while (--i) {                 /* note that i == g from above */
+    *xp++ = (j += *p++);
+  }
+
+
+  /* Make a table of values in order of bit lengths */
+  p = b;  i = 0;
+  do {
+    if ((j = *p++) != 0)
+      v[x[j]++] = i;
+  } while (++i < n);
+  n = x[g];                     /* set n to length of v */
+
+
+  /* Generate the Huffman codes and for each, make the table entries */
+  x[0] = i = 0;                 /* first Huffman code is zero */
+  p = v;                        /* grab values in bit order */
+  h = -1;                       /* no tables yet--level -1 */
+  w = -l;                       /* bits decoded == (l * h) */
+  u[0] = (inflate_huft *)Z_NULL;        /* just to keep compilers happy */
+  q = (inflate_huft *)Z_NULL;   /* ditto */
+  z = 0;                        /* ditto */
+
+  /* go through the bit lengths (k already is bits in shortest code) */
+  for (; k <= g; k++)
+  {
+    a = c[k];
+    while (a--)
+    {
+      /* here i is the Huffman code of length k bits for value *p */
+      /* make tables up to required level */
+      while (k > w + l)
+      {
+        h++;
+        w += l;                 /* previous table always l bits */
+
+        /* compute minimum size table less than or equal to l bits */
+        z = g - w;
+        z = z > (uInt)l ? l : z;        /* table size upper limit */
+        if ((f = 1 << (j = k - w)) > a + 1)     /* try a k-w bit table */
+        {                       /* too few codes for k-w bit table */
+          f -= a + 1;           /* deduct codes from patterns left */
+          xp = c + k;
+          if (j < z)
+            while (++j < z)     /* try smaller tables up to z bits */
+            {
+              if ((f <<= 1) <= *++xp)
+                break;          /* enough codes to use up j bits */
+              f -= *xp;         /* else deduct codes from patterns */
+            }
+        }
+        z = 1 << j;             /* table entries for j-bit table */
+
+        /* allocate new table */
+        if (*hn + z > MANY)     /* (note: doesn't matter for fixed) */
+          return Z_DATA_ERROR;  /* overflow of MANY */
+        u[h] = q = hp + *hn;
+        *hn += z;
+
+        /* connect to last table, if there is one */
+        if (h)
+        {
+          x[h] = i;             /* save pattern for backing up */
+          r.bits = (Byte)l;     /* bits to dump before this table */
+          r.exop = (Byte)j;     /* bits in this table */
+          j = i >> (w - l);
+          r.base = (uInt)(q - u[h-1] - j);   /* offset to this table */
+          u[h-1][j] = r;        /* connect to last table */
+        }
+        else
+          *t = q;               /* first table is returned result */
+      }
+
+      /* set up table entry in r */
+      r.bits = (Byte)(k - w);
+      if (p >= v + n)
+        r.exop = 128 + 64;      /* out of values--invalid code */
+      else if (*p < s)
+      {
+        r.exop = (Byte)(*p < 256 ? 0 : 32 + 64);     /* 256 is end-of-block */
+        r.base = *p++;          /* simple code is just the value */
+      }
+      else
+      {
+        r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
+        r.base = d[*p++ - s];
+      }
+
+      /* fill code-like entries with r */
+      f = 1 << (k - w);
+      for (j = i >> w; j < z; j += f)
+        q[j] = r;
+
+      /* backwards increment the k-bit code i */
+      for (j = 1 << (k - 1); i & j; j >>= 1)
+        i ^= j;
+      i ^= j;
+
+      /* backup over finished tables */
+      mask = (1 << w) - 1;      /* needed on HP, cc -O bug */
+      while ((i & mask) != x[h])
+      {
+        h--;                    /* don't need to update q */
+        w -= l;
+        mask = (1 << w) - 1;
+      }
+    }
+  }
+
+
+  /* Return Z_BUF_ERROR if we were given an incomplete table */
+  return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK;
+}
+
+
+int inflate_trees_bits(c, bb, tb, hp, z)
+uIntf *c;               /* 19 code lengths */
+uIntf *bb;              /* bits tree desired/actual depth */
+inflate_huft * FAR *tb; /* bits tree result */
+inflate_huft *hp;       /* space for trees */
+z_streamp z;            /* for messages */
+{
+  int r;
+  uInt hn = 0;          /* hufts used in space */
+  uIntf *v;             /* work area for huft_build */
+
+  if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL)
+    return Z_MEM_ERROR;
+  r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL,
+                 tb, bb, hp, &hn, v);
+  if (r == Z_DATA_ERROR)
+    z->msg = (char*)"oversubscribed dynamic bit lengths tree";
+  else if (r == Z_BUF_ERROR || *bb == 0)
+  {
+    z->msg = (char*)"incomplete dynamic bit lengths tree";
+    r = Z_DATA_ERROR;
+  }
+  ZFREE(z, v);
+  return r;
+}
+
+
+int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z)
+uInt nl;                /* number of literal/length codes */
+uInt nd;                /* number of distance codes */
+uIntf *c;               /* that many (total) code lengths */
+uIntf *bl;              /* literal desired/actual bit depth */
+uIntf *bd;              /* distance desired/actual bit depth */
+inflate_huft * FAR *tl; /* literal/length tree result */
+inflate_huft * FAR *td; /* distance tree result */
+inflate_huft *hp;       /* space for trees */
+z_streamp z;            /* for messages */
+{
+  int r;
+  uInt hn = 0;          /* hufts used in space */
+  uIntf *v;             /* work area for huft_build */
+
+  /* allocate work area */
+  if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
+    return Z_MEM_ERROR;
+
+  /* build literal/length tree */
+  r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v);
+  if (r != Z_OK || *bl == 0)
+  {
+    if (r == Z_DATA_ERROR)
+      z->msg = (char*)"oversubscribed literal/length tree";
+    else if (r != Z_MEM_ERROR)
+    {
+      z->msg = (char*)"incomplete literal/length tree";
+      r = Z_DATA_ERROR;
+    }
+    ZFREE(z, v);
+    return r;
+  }
+
+  /* build distance tree */
+  r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v);
+  if (r != Z_OK || (*bd == 0 && nl > 257))
+  {
+    if (r == Z_DATA_ERROR)
+      z->msg = (char*)"oversubscribed distance tree";
+    else if (r == Z_BUF_ERROR) {
+#ifdef PKZIP_BUG_WORKAROUND
+      r = Z_OK;
+    }
+#else
+      z->msg = (char*)"incomplete distance tree";
+      r = Z_DATA_ERROR;
+    }
+    else if (r != Z_MEM_ERROR)
+    {
+      z->msg = (char*)"empty distance tree with lengths";
+      r = Z_DATA_ERROR;
+    }
+    ZFREE(z, v);
+    return r;
+#endif
+  }
+
+  /* done */
+  ZFREE(z, v);
+  return Z_OK;
+}
+
+
+/* build fixed tables only once--keep them here */
+#ifdef BUILDFIXED
+local int fixed_built = 0;
+#define FIXEDH 544      /* number of hufts used by fixed tables */
+local inflate_huft fixed_mem[FIXEDH];
+local uInt fixed_bl;
+local uInt fixed_bd;
+local inflate_huft *fixed_tl;
+local inflate_huft *fixed_td;
+#else
+#include "inffixed.h"
+#endif
+
+
+int inflate_trees_fixed(bl, bd, tl, td, z)
+uIntf *bl;               /* literal desired/actual bit depth */
+uIntf *bd;               /* distance desired/actual bit depth */
+inflate_huft * FAR *tl;  /* literal/length tree result */
+inflate_huft * FAR *td;  /* distance tree result */
+z_streamp z;             /* for memory allocation */
+{
+#ifdef BUILDFIXED
+  /* build fixed tables if not already */
+  if (!fixed_built)
+  {
+    int k;              /* temporary variable */
+    uInt f = 0;         /* number of hufts used in fixed_mem */
+    uIntf *c;           /* length list for huft_build */
+    uIntf *v;           /* work area for huft_build */
+
+    /* allocate memory */
+    if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
+      return Z_MEM_ERROR;
+    if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
+    {
+      ZFREE(z, c);
+      return Z_MEM_ERROR;
+    }
+
+    /* literal table */
+    for (k = 0; k < 144; k++)
+      c[k] = 8;
+    for (; k < 256; k++)
+      c[k] = 9;
+    for (; k < 280; k++)
+      c[k] = 7;
+    for (; k < 288; k++)
+      c[k] = 8;
+    fixed_bl = 9;
+    huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl,
+               fixed_mem, &f, v);
+
+    /* distance table */
+    for (k = 0; k < 30; k++)
+      c[k] = 5;
+    fixed_bd = 5;
+    huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd,
+               fixed_mem, &f, v);
+
+    /* done */
+    ZFREE(z, v);
+    ZFREE(z, c);
+    fixed_built = 1;
+  }
+#endif
+  *bl = fixed_bl;
+  *bd = fixed_bd;
+  *tl = fixed_tl;
+  *td = fixed_td;
+  return Z_OK;
+}
diff --git a/ZLIB/inftrees.h b/ZLIB/inftrees.h
new file mode 100644
index 0000000..04b73b7
--- /dev/null
+++ b/ZLIB/inftrees.h
@@ -0,0 +1,58 @@
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* Huffman code lookup table entry--this entry is four bytes for machines
+   that have 16-bit pointers (e.g. PC's in the small or medium model). */
+
+typedef struct inflate_huft_s FAR inflate_huft;
+
+struct inflate_huft_s {
+  union {
+    struct {
+      Byte Exop;        /* number of extra bits or operation */
+      Byte Bits;        /* number of bits in this code or subcode */
+    } what;
+    uInt pad;           /* pad structure to a power of 2 (4 bytes for */
+  } word;               /*  16-bit, 8 bytes for 32-bit int's) */
+  uInt base;            /* literal, length base, distance base,
+                           or table offset */
+};
+
+/* Maximum size of dynamic tree.  The maximum found in a long but non-
+   exhaustive search was 1004 huft structures (850 for length/literals
+   and 154 for distances, the latter actually the result of an
+   exhaustive search).  The actual maximum is not known, but the
+   value below is more than safe. */
+#define MANY 1440
+
+extern int inflate_trees_bits OF((
+    uIntf *,                    /* 19 code lengths */
+    uIntf *,                    /* bits tree desired/actual depth */
+    inflate_huft * FAR *,       /* bits tree result */
+    inflate_huft *,             /* space for trees */
+    z_streamp));                /* for messages */
+
+extern int inflate_trees_dynamic OF((
+    uInt,                       /* number of literal/length codes */
+    uInt,                       /* number of distance codes */
+    uIntf *,                    /* that many (total) code lengths */
+    uIntf *,                    /* literal desired/actual bit depth */
+    uIntf *,                    /* distance desired/actual bit depth */
+    inflate_huft * FAR *,       /* literal/length tree result */
+    inflate_huft * FAR *,       /* distance tree result */
+    inflate_huft *,             /* space for trees */
+    z_streamp));                /* for messages */
+
+extern int inflate_trees_fixed OF((
+    uIntf *,                    /* literal desired/actual bit depth */
+    uIntf *,                    /* distance desired/actual bit depth */
+    inflate_huft * FAR *,       /* literal/length tree result */
+    inflate_huft * FAR *,       /* distance tree result */
+    z_streamp));                /* for memory allocation */
diff --git a/ZLIB/infutil.c b/ZLIB/infutil.c
new file mode 100644
index 0000000..9a07622
--- /dev/null
+++ b/ZLIB/infutil.c
@@ -0,0 +1,87 @@
+/* inflate_util.c -- data and routines common to blocks and codes
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+#include "zutil.h"
+#include "infblock.h"
+#include "inftrees.h"
+#include "infcodes.h"
+#include "infutil.h"
+
+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
+
+/* And'ing with mask[n] masks the lower n bits */
+uInt inflate_mask[17] = {
+    0x0000,
+    0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+    0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+
+/* copy as much as possible from the sliding window to the output area */
+int inflate_flush(s, z, r)
+inflate_blocks_statef *s;
+z_streamp z;
+int r;
+{
+  uInt n;
+  Bytef *p;
+  Bytef *q;
+
+  /* local copies of source and destination pointers */
+  p = z->next_out;
+  q = s->read;
+
+  /* compute number of bytes to copy as far as end of window */
+  n = (uInt)((q <= s->write ? s->write : s->end) - q);
+  if (n > z->avail_out) n = z->avail_out;
+  if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+  /* update counters */
+  z->avail_out -= n;
+  z->total_out += n;
+
+  /* update check information */
+  if (s->checkfn != Z_NULL)
+    z->adler = s->check = (*s->checkfn)(s->check, q, n);
+
+  /* copy as far as end of window */
+  zmemcpy(p, q, n);
+  p += n;
+  q += n;
+
+  /* see if more to copy at beginning of window */
+  if (q == s->end)
+  {
+    /* wrap pointers */
+    q = s->window;
+    if (s->write == s->end)
+      s->write = s->window;
+
+    /* compute bytes to copy */
+    n = (uInt)(s->write - q);
+    if (n > z->avail_out) n = z->avail_out;
+    if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+    /* update counters */
+    z->avail_out -= n;
+    z->total_out += n;
+
+    /* update check information */
+    if (s->checkfn != Z_NULL)
+      z->adler = s->check = (*s->checkfn)(s->check, q, n);
+
+    /* copy */
+    zmemcpy(p, q, n);
+    p += n;
+    q += n;
+  }
+
+  /* update pointers */
+  z->next_out = p;
+  s->read = q;
+
+  /* done */
+  return r;
+}
diff --git a/ZLIB/infutil.h b/ZLIB/infutil.h
new file mode 100644
index 0000000..4401df8
--- /dev/null
+++ b/ZLIB/infutil.h
@@ -0,0 +1,98 @@
+/* infutil.h -- types and macros common to blocks and codes
+ * Copyright (C) 1995-2002 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+#ifndef _INFUTIL_H
+#define _INFUTIL_H
+
+typedef enum {
+      TYPE,     /* get type bits (3, including end bit) */
+      LENS,     /* get lengths for stored */
+      STORED,   /* processing stored block */
+      TABLE,    /* get table lengths */
+      BTREE,    /* get bit lengths tree for a dynamic block */
+      DTREE,    /* get length, distance trees for a dynamic block */
+      CODES,    /* processing fixed or dynamic block */
+      DRY,      /* output remaining window bytes */
+      DONE,     /* finished last block, done */
+      BAD}      /* got a data error--stuck here */
+inflate_block_mode;
+
+/* inflate blocks semi-private state */
+struct inflate_blocks_state {
+
+  /* mode */
+  inflate_block_mode  mode;     /* current inflate_block mode */
+
+  /* mode dependent information */
+  union {
+    uInt left;          /* if STORED, bytes left to copy */
+    struct {
+      uInt table;               /* table lengths (14 bits) */
+      uInt index;               /* index into blens (or border) */
+      uIntf *blens;             /* bit lengths of codes */
+      uInt bb;                  /* bit length tree depth */
+      inflate_huft *tb;         /* bit length decoding tree */
+    } trees;            /* if DTREE, decoding info for trees */
+    struct {
+      inflate_codes_statef 
+         *codes;
+    } decode;           /* if CODES, current state */
+  } sub;                /* submode */
+  uInt last;            /* true if this block is the last block */
+
+  /* mode independent information */
+  uInt bitk;            /* bits in bit buffer */
+  uLong bitb;           /* bit buffer */
+  inflate_huft *hufts;  /* single malloc for tree space */
+  Bytef *window;        /* sliding window */
+  Bytef *end;           /* one byte after sliding window */
+  Bytef *read;          /* window read pointer */
+  Bytef *write;         /* window write pointer */
+  check_func checkfn;   /* check function */
+  uLong check;          /* check on output */
+
+};
+
+
+/* defines for inflate input/output */
+/*   update pointers and return */
+#define UPDBITS {s->bitb=b;s->bitk=k;}
+#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;}
+#define UPDOUT {s->write=q;}
+#define UPDATE {UPDBITS UPDIN UPDOUT}
+#define LEAVE {UPDATE return inflate_flush(s,z,r);}
+/*   get bytes and bits */
+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
+#define NEEDBYTE {if(n)r=Z_OK;else LEAVE}
+#define NEXTBYTE (n--,*p++)
+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define DUMPBITS(j) {b>>=(j);k-=(j);}
+/*   output bytes */
+#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
+#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
+#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
+#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT}
+#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;}
+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
+/*   load local pointers */
+#define LOAD {LOADIN LOADOUT}
+
+/* masks for lower bits (size given to avoid silly warnings with Visual C++) */
+extern uInt inflate_mask[17];
+
+/* copy as much as possible from the sliding window to the output area */
+extern int inflate_flush OF((
+    inflate_blocks_statef *,
+    z_streamp ,
+    int));
+
+struct internal_state      {int dummy;}; /* for buggy compilers */
+
+#endif
diff --git a/ZLIB/trees.c b/ZLIB/trees.c
new file mode 100644
index 0000000..1942c18
--- /dev/null
+++ b/ZLIB/trees.c
@@ -0,0 +1,1214 @@
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2002 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process uses several Huffman trees. The more
+ *      common source values are represented by shorter bit sequences.
+ *
+ *      Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values).  The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ *      Storer, James A.
+ *          Data Compression:  Methods and Theory, pp. 49-50.
+ *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
+ *
+ *      Sedgewick, R.
+ *          Algorithms, p290.
+ *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* @(#) $Id: trees.c,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+/* #define GEN_TREES_H */
+
+#include "deflate.h"
+
+#ifdef DEBUG
+#  include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6      16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10    17
+/* repeat a zero length 3-10 times  (3 bits of repeat count) */
+
+#define REPZ_11_138  18
+/* repeat a zero length 11-138 times  (7 bits of repeat count) */
+
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local const uch bl_order[BL_CODES]
+   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+#define Buf_size (8 * 2*sizeof(char))
+/* Number of bits used within bi_buf. (bi_buf might be implemented on
+ * more than 16 bits on some systems.)
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+#  include "trees.h"
+#endif /* GEN_TREES_H */
+
+struct static_tree_desc_s {
+    const ct_data *static_tree;  /* static tree or NULL */
+    const intf *extra_bits;      /* extra bits for each code or NULL */
+    int     extra_base;          /* base index for extra_bits */
+    int     elems;               /* max number of elements in the tree */
+    int     max_length;          /* max bit length for the codes */
+};
+
+local static_tree_desc  static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local static_tree_desc  static_d_desc =
+{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
+
+local static_tree_desc  static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init OF((void));
+local void init_block     OF((deflate_state *s));
+local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
+local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree     OF((deflate_state *s, tree_desc *desc));
+local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local int  build_bl_tree  OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+                              int blcodes));
+local void compress_block OF((deflate_state *s, ct_data *ltree,
+                              ct_data *dtree));
+local void set_data_type  OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned value, int length));
+local void bi_windup      OF((deflate_state *s));
+local void bi_flush       OF((deflate_state *s));
+local void copy_block     OF((deflate_state *s, charf *buf, unsigned len,
+                              int header));
+
+#ifdef GEN_TREES_H
+local void gen_trees_header OF((void));
+#endif
+
+#ifndef DEBUG
+#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+   /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* DEBUG */
+#  define send_code(s, c, tree) \
+     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+       send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef DEBUG
+local void send_bits      OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+    deflate_state *s;
+    int value;  /* value to send */
+    int length; /* number of bits */
+{
+    Tracevv((stderr," l %2d v %4x ", length, value));
+    Assert(length > 0 && length <= 15, "invalid length");
+    s->bits_sent += (ulg)length;
+
+    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * unused bits in value.
+     */
+    if (s->bi_valid > (int)Buf_size - length) {
+        s->bi_buf |= (value << s->bi_valid);
+        put_short(s, s->bi_buf);
+        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+        s->bi_valid += length - Buf_size;
+    } else {
+        s->bi_buf |= value << s->bi_valid;
+        s->bi_valid += length;
+    }
+}
+#else /* !DEBUG */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+  if (s->bi_valid > (int)Buf_size - len) {\
+    int val = value;\
+    s->bi_buf |= (val << s->bi_valid);\
+    put_short(s, s->bi_buf);\
+    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+    s->bi_valid += len - Buf_size;\
+  } else {\
+    s->bi_buf |= (value) << s->bi_valid;\
+    s->bi_valid += len;\
+  }\
+}
+#endif /* DEBUG */
+
+
+#define MAX(a,b) (a >= b ? a : b)
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables.
+ */
+local void tr_static_init()
+{
+#if defined(GEN_TREES_H) || !defined(STDC)
+    static int static_init_done = 0;
+    int n;        /* iterates over tree elements */
+    int bits;     /* bit counter */
+    int length;   /* length value */
+    int code;     /* code value */
+    int dist;     /* distance index */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    if (static_init_done) return;
+
+    /* For some embedded targets, global variables are not initialized: */
+    static_l_desc.static_tree = static_ltree;
+    static_l_desc.extra_bits = extra_lbits;
+    static_d_desc.static_tree = static_dtree;
+    static_d_desc.extra_bits = extra_dbits;
+    static_bl_desc.extra_bits = extra_blbits;
+
+    /* Initialize the mapping length (0..255) -> length code (0..28) */
+    length = 0;
+    for (code = 0; code < LENGTH_CODES-1; code++) {
+        base_length[code] = length;
+        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+            _length_code[length++] = (uch)code;
+        }
+    }
+    Assert (length == 256, "tr_static_init: length != 256");
+    /* Note that the length 255 (match length 258) can be represented
+     * in two different ways: code 284 + 5 bits or code 285, so we
+     * overwrite length_code[255] to use the best encoding:
+     */
+    _length_code[length-1] = (uch)code;
+
+    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+    dist = 0;
+    for (code = 0 ; code < 16; code++) {
+        base_dist[code] = dist;
+        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+            _dist_code[dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: dist != 256");
+    dist >>= 7; /* from now on, all distances are divided by 128 */
+    for ( ; code < D_CODES; code++) {
+        base_dist[code] = dist << 7;
+        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+            _dist_code[256 + dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+    /* Construct the codes of the static literal tree */
+    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+    n = 0;
+    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+    /* Codes 286 and 287 do not exist, but we must include them in the
+     * tree construction to get a canonical Huffman tree (longest code
+     * all ones)
+     */
+    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+    /* The static distance tree is trivial: */
+    for (n = 0; n < D_CODES; n++) {
+        static_dtree[n].Len = 5;
+        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+    }
+    static_init_done = 1;
+
+#  ifdef GEN_TREES_H
+    gen_trees_header();
+#  endif
+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
+}
+
+/* ===========================================================================
+ * Genererate the file trees.h describing the static trees.
+ */
+#ifdef GEN_TREES_H
+#  ifndef DEBUG
+#    include <stdio.h>
+#  endif
+
+#  define SEPARATOR(i, last, width) \
+      ((i) == (last)? "\n};\n\n" :    \
+       ((i) % (width) == (width)-1 ? ",\n" : ", "))
+
+void gen_trees_header()
+{
+    FILE *header = fopen("trees.h", "w");
+    int i;
+
+    Assert (header != NULL, "Can't open trees.h");
+    fprintf(header,
+	    "/* header created automatically with -DGEN_TREES_H */\n\n");
+
+    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+    for (i = 0; i < L_CODES+2; i++) {
+	fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+		static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+    }
+
+    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+	fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+		static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+    }
+
+    fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
+    for (i = 0; i < DIST_CODE_LEN; i++) {
+	fprintf(header, "%2u%s", _dist_code[i],
+		SEPARATOR(i, DIST_CODE_LEN-1, 20));
+    }
+
+    fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+	fprintf(header, "%2u%s", _length_code[i],
+		SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+    }
+
+    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+    for (i = 0; i < LENGTH_CODES; i++) {
+	fprintf(header, "%1u%s", base_length[i],
+		SEPARATOR(i, LENGTH_CODES-1, 20));
+    }
+
+    fprintf(header, "local const int base_dist[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+	fprintf(header, "%5u%s", base_dist[i],
+		SEPARATOR(i, D_CODES-1, 10));
+    }
+
+    fclose(header);
+}
+#endif /* GEN_TREES_H */
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void _tr_init(s)
+    deflate_state *s;
+{
+    tr_static_init();
+
+    s->l_desc.dyn_tree = s->dyn_ltree;
+    s->l_desc.stat_desc = &static_l_desc;
+
+    s->d_desc.dyn_tree = s->dyn_dtree;
+    s->d_desc.stat_desc = &static_d_desc;
+
+    s->bl_desc.dyn_tree = s->bl_tree;
+    s->bl_desc.stat_desc = &static_bl_desc;
+
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+    s->last_eob_len = 8; /* enough lookahead for inflate */
+#ifdef DEBUG
+    s->compressed_len = 0L;
+    s->bits_sent = 0L;
+#endif
+
+    /* Initialize the first block of the first file: */
+    init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(s)
+    deflate_state *s;
+{
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+    top = s->heap[SMALLEST]; \
+    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+    pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+   (tree[n].Freq < tree[m].Freq || \
+   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local void pqdownheap(s, tree, k)
+    deflate_state *s;
+    ct_data *tree;  /* the tree to restore */
+    int k;               /* node to move down */
+{
+    int v = s->heap[k];
+    int j = k << 1;  /* left son of k */
+    while (j <= s->heap_len) {
+        /* Set j to the smallest of the two sons: */
+        if (j < s->heap_len &&
+            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            j++;
+        }
+        /* Exit if v is smaller than both sons */
+        if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+        /* Exchange v with the smallest son */
+        s->heap[k] = s->heap[j];  k = j;
+
+        /* And continue down the tree, setting j to the left son of k */
+        j <<= 1;
+    }
+    s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ *    above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ *     array bl_count contains the frequencies for each bit length.
+ *     The length opt_len is updated; static_len is also updated if stree is
+ *     not null.
+ */
+local void gen_bitlen(s, desc)
+    deflate_state *s;
+    tree_desc *desc;    /* the tree descriptor */
+{
+    ct_data *tree        = desc->dyn_tree;
+    int max_code         = desc->max_code;
+    const ct_data *stree = desc->stat_desc->static_tree;
+    const intf *extra    = desc->stat_desc->extra_bits;
+    int base             = desc->stat_desc->extra_base;
+    int max_length       = desc->stat_desc->max_length;
+    int h;              /* heap index */
+    int n, m;           /* iterate over the tree elements */
+    int bits;           /* bit length */
+    int xbits;          /* extra bits */
+    ush f;              /* frequency */
+    int overflow = 0;   /* number of elements with bit length too large */
+
+    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+    /* In a first pass, compute the optimal bit lengths (which may
+     * overflow in the case of the bit length tree).
+     */
+    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+        n = s->heap[h];
+        bits = tree[tree[n].Dad].Len + 1;
+        if (bits > max_length) bits = max_length, overflow++;
+        tree[n].Len = (ush)bits;
+        /* We overwrite tree[n].Dad which is no longer needed */
+
+        if (n > max_code) continue; /* not a leaf node */
+
+        s->bl_count[bits]++;
+        xbits = 0;
+        if (n >= base) xbits = extra[n-base];
+        f = tree[n].Freq;
+        s->opt_len += (ulg)f * (bits + xbits);
+        if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+    }
+    if (overflow == 0) return;
+
+    Trace((stderr,"\nbit length overflow\n"));
+    /* This happens for example on obj2 and pic of the Calgary corpus */
+
+    /* Find the first bit length which could increase: */
+    do {
+        bits = max_length-1;
+        while (s->bl_count[bits] == 0) bits--;
+        s->bl_count[bits]--;      /* move one leaf down the tree */
+        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[max_length]--;
+        /* The brother of the overflow item also moves one step up,
+         * but this does not affect bl_count[max_length]
+         */
+        overflow -= 2;
+    } while (overflow > 0);
+
+    /* Now recompute all bit lengths, scanning in increasing frequency.
+     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+     * lengths instead of fixing only the wrong ones. This idea is taken
+     * from 'ar' written by Haruhiko Okumura.)
+     */
+    for (bits = max_length; bits != 0; bits--) {
+        n = s->bl_count[bits];
+        while (n != 0) {
+            m = s->heap[--h];
+            if (m > max_code) continue;
+            if (tree[m].Len != (unsigned) bits) {
+                Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((long)bits - (long)tree[m].Len)
+                              *(long)tree[m].Freq;
+                tree[m].Len = (ush)bits;
+            }
+            n--;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes (tree, max_code, bl_count)
+    ct_data *tree;             /* the tree to decorate */
+    int max_code;              /* largest code with non zero frequency */
+    ushf *bl_count;            /* number of codes at each bit length */
+{
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    ush code = 0;              /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+    }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ *     and corresponding code. The length opt_len is updated; static_len is
+ *     also updated if stree is not null. The field max_code is set.
+ */
+local void build_tree(s, desc)
+    deflate_state *s;
+    tree_desc *desc; /* the tree descriptor */
+{
+    ct_data *tree         = desc->dyn_tree;
+    const ct_data *stree  = desc->stat_desc->static_tree;
+    int elems             = desc->stat_desc->elems;
+    int n, m;          /* iterate over heap elements */
+    int max_code = -1; /* largest code with non zero frequency */
+    int node;          /* new node being created */
+
+    /* Construct the initial heap, with least frequent element in
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[0] is not used.
+     */
+    s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+    for (n = 0; n < elems; n++) {
+        if (tree[n].Freq != 0) {
+            s->heap[++(s->heap_len)] = max_code = n;
+            s->depth[n] = 0;
+        } else {
+            tree[n].Len = 0;
+        }
+    }
+
+    /* The pkzip format requires that at least one distance code exists,
+     * and that at least one bit should be sent even if there is only one
+     * possible code. So to avoid special checks later on we force at least
+     * two codes of non zero frequency.
+     */
+    while (s->heap_len < 2) {
+        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+        tree[node].Freq = 1;
+        s->depth[node] = 0;
+        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+        /* node is 0 or 1 so it does not have extra bits */
+    }
+    desc->max_code = max_code;
+
+    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+     * establish sub-heaps of increasing lengths:
+     */
+    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+    /* Construct the Huffman tree by repeatedly combining the least two
+     * frequent nodes.
+     */
+    node = elems;              /* next internal node of the tree */
+    do {
+        pqremove(s, tree, n);  /* n = node of least frequency */
+        m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+        s->heap[--(s->heap_max)] = m;
+
+        /* Create a new node father of n and m */
+        tree[node].Freq = tree[n].Freq + tree[m].Freq;
+        s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1);
+        tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+        if (tree == s->bl_tree) {
+            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+        }
+#endif
+        /* and insert the new node in the heap */
+        s->heap[SMALLEST] = node++;
+        pqdownheap(s, tree, SMALLEST);
+
+    } while (s->heap_len >= 2);
+
+    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+    /* At this point, the fields freq and dad are set. We can now
+     * generate the bit lengths.
+     */
+    gen_bitlen(s, (tree_desc *)desc);
+
+    /* The field len is now set, we can generate the bit codes */
+    gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local void scan_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree;   /* the tree to be scanned */
+    int max_code;    /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    if (nextlen == 0) max_count = 138, min_count = 3;
+    tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            s->bl_tree[curlen].Freq += count;
+        } else if (curlen != 0) {
+            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+            s->bl_tree[REP_3_6].Freq++;
+        } else if (count <= 10) {
+            s->bl_tree[REPZ_3_10].Freq++;
+        } else {
+            s->bl_tree[REPZ_11_138].Freq++;
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local void send_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree; /* the tree to be scanned */
+    int max_code;       /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    if (nextlen == 0) max_count = 138, min_count = 3;
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+        } else if (curlen != 0) {
+            if (curlen != prevlen) {
+                send_code(s, curlen, s->bl_tree); count--;
+            }
+            Assert(count >= 3 && count <= 6, " 3_6?");
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+        } else if (count <= 10) {
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+        } else {
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local int build_bl_tree(s)
+    deflate_state *s;
+{
+    int max_blindex;  /* index of last bit length code of non zero freq */
+
+    /* Determine the bit length frequencies for literal and distance trees */
+    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+    /* Build the bit length tree: */
+    build_tree(s, (tree_desc *)(&(s->bl_desc)));
+    /* opt_len now includes the length of the tree representations, except
+     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+     */
+
+    /* Determine the number of bit length codes to send. The pkzip format
+     * requires that at least 4 bit length codes be sent. (appnote.txt says
+     * 3 but the actual value used is 4.)
+     */
+    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+    }
+    /* Update opt_len to include the bit length tree and counts */
+    s->opt_len += 3*(max_blindex+1) + 5+5+4;
+    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+            s->opt_len, s->static_len));
+
+    return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+    deflate_state *s;
+    int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+    int rank;                    /* index in bl_order */
+
+    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+            "too many codes");
+    Tracev((stderr, "\nbl counts: "));
+    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes-1,   5);
+    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    for (rank = 0; rank < blcodes; rank++) {
+        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+    }
+    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void _tr_stored_block(s, buf, stored_len, eof)
+    deflate_state *s;
+    charf *buf;       /* input block */
+    ulg stored_len;   /* length of input block */
+    int eof;          /* true if this is the last block for a file */
+{
+    send_bits(s, (STORED_BLOCK<<1)+eof, 3);  /* send block type */
+#ifdef DEBUG
+    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+    s->compressed_len += (stored_len + 4) << 3;
+#endif
+    copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ * The current inflate code requires 9 bits of lookahead. If the
+ * last two codes for the previous block (real code plus EOB) were coded
+ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
+ * the last real code. In this case we send two empty static blocks instead
+ * of one. (There are no problems if the previous block is stored or fixed.)
+ * To simplify the code, we assume the worst case of last real code encoded
+ * on one bit only.
+ */
+void _tr_align(s)
+    deflate_state *s;
+{
+    send_bits(s, STATIC_TREES<<1, 3);
+    send_code(s, END_BLOCK, static_ltree);
+#ifdef DEBUG
+    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+#endif
+    bi_flush(s);
+    /* Of the 10 bits for the empty block, we have already sent
+     * (10 - bi_valid) bits. The lookahead for the last real code (before
+     * the EOB of the previous block) was thus at least one plus the length
+     * of the EOB plus what we have just sent of the empty static block.
+     */
+    if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
+        send_bits(s, STATIC_TREES<<1, 3);
+        send_code(s, END_BLOCK, static_ltree);
+#ifdef DEBUG
+        s->compressed_len += 10L;
+#endif
+        bi_flush(s);
+    }
+    s->last_eob_len = 7;
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file.
+ */
+void _tr_flush_block(s, buf, stored_len, eof)
+    deflate_state *s;
+    charf *buf;       /* input block, or NULL if too old */
+    ulg stored_len;   /* length of input block */
+    int eof;          /* true if this is the last block for a file */
+{
+    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+    int max_blindex = 0;  /* index of last bit length code of non zero freq */
+
+    /* Build the Huffman trees unless a stored block is forced */
+    if (s->level > 0) {
+
+	 /* Check if the file is ascii or binary */
+	if (s->data_type == Z_UNKNOWN) set_data_type(s);
+
+	/* Construct the literal and distance trees */
+	build_tree(s, (tree_desc *)(&(s->l_desc)));
+	Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+		s->static_len));
+
+	build_tree(s, (tree_desc *)(&(s->d_desc)));
+	Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+		s->static_len));
+	/* At this point, opt_len and static_len are the total bit lengths of
+	 * the compressed block data, excluding the tree representations.
+	 */
+
+	/* Build the bit length tree for the above two trees, and get the index
+	 * in bl_order of the last bit length code to send.
+	 */
+	max_blindex = build_bl_tree(s);
+
+	/* Determine the best encoding. Compute first the block length in bytes*/
+	opt_lenb = (s->opt_len+3+7)>>3;
+	static_lenb = (s->static_len+3+7)>>3;
+
+	Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+		opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+		s->last_lit));
+
+	if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+    } else {
+        Assert(buf != (char*)0, "lost buf");
+	opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+    }
+
+#ifdef FORCE_STORED
+    if (buf != (char*)0) { /* force stored block */
+#else
+    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+                       /* 4: two words for the lengths */
+#endif
+        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+         * Otherwise we can't have processed more than WSIZE input bytes since
+         * the last block flush, because compression would have been
+         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+         * transform a block into a stored block.
+         */
+        _tr_stored_block(s, buf, stored_len, eof);
+
+#ifdef FORCE_STATIC
+    } else if (static_lenb >= 0) { /* force static trees */
+#else
+    } else if (static_lenb == opt_lenb) {
+#endif
+        send_bits(s, (STATIC_TREES<<1)+eof, 3);
+        compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
+#ifdef DEBUG
+        s->compressed_len += 3 + s->static_len;
+#endif
+    } else {
+        send_bits(s, (DYN_TREES<<1)+eof, 3);
+        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+                       max_blindex+1);
+        compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
+#ifdef DEBUG
+        s->compressed_len += 3 + s->opt_len;
+#endif
+    }
+    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+    /* The above check is made mod 2^32, for files larger than 512 MB
+     * and uLong implemented on 32 bits.
+     */
+    init_block(s);
+
+    if (eof) {
+        bi_windup(s);
+#ifdef DEBUG
+        s->compressed_len += 7;  /* align on byte boundary */
+#endif
+    }
+    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+           s->compressed_len-7*eof));
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int _tr_tally (s, dist, lc)
+    deflate_state *s;
+    unsigned dist;  /* distance of matched string */
+    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+    s->d_buf[s->last_lit] = (ush)dist;
+    s->l_buf[s->last_lit++] = (uch)lc;
+    if (dist == 0) {
+        /* lc is the unmatched char */
+        s->dyn_ltree[lc].Freq++;
+    } else {
+        s->matches++;
+        /* Here, lc is the match length - MIN_MATCH */
+        dist--;             /* dist = match distance - 1 */
+        Assert((ush)dist < (ush)MAX_DIST(s) &&
+               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
+
+        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_dtree[d_code(dist)].Freq++;
+    }
+
+#ifdef TRUNCATE_BLOCK
+    /* Try to guess if it is profitable to stop the current block here */
+    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+        /* Compute an upper bound for the compressed length */
+        ulg out_length = (ulg)s->last_lit*8L;
+        ulg in_length = (ulg)((long)s->strstart - s->block_start);
+        int dcode;
+        for (dcode = 0; dcode < D_CODES; dcode++) {
+            out_length += (ulg)s->dyn_dtree[dcode].Freq *
+                (5L+extra_dbits[dcode]);
+        }
+        out_length >>= 3;
+        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+               s->last_lit, in_length, out_length,
+               100L - out_length*100L/in_length));
+        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+    }
+#endif
+    return (s->last_lit == s->lit_bufsize-1);
+    /* We avoid equality with lit_bufsize because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(s, ltree, dtree)
+    deflate_state *s;
+    ct_data *ltree; /* literal tree */
+    ct_data *dtree; /* distance tree */
+{
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned lx = 0;    /* running index in l_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->last_lit != 0) do {
+        dist = s->d_buf[lx];
+        lc = s->l_buf[lx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code+LITERALS+1, ltree); /* send the length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+        Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
+
+    } while (lx < s->last_lit);
+
+    send_code(s, END_BLOCK, ltree);
+    s->last_eob_len = ltree[END_BLOCK].Len;
+}
+
+/* ===========================================================================
+ * Set the data type to ASCII or BINARY, using a crude approximation:
+ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
+ * IN assertion: the fields freq of dyn_ltree are set and the total of all
+ * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
+ */
+local void set_data_type(s)
+    deflate_state *s;
+{
+    int n = 0;
+    unsigned ascii_freq = 0;
+    unsigned bin_freq = 0;
+    while (n < 7)        bin_freq += s->dyn_ltree[n++].Freq;
+    while (n < 128)    ascii_freq += s->dyn_ltree[n++].Freq;
+    while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
+    s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(code, len)
+    unsigned code; /* the value to invert */
+    int len;       /* its bit length */
+{
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(s)
+    deflate_state *s;
+{
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+    deflate_state *s;
+{
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef DEBUG
+    s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Copy a stored block, storing first the length and its
+ * one's complement if requested.
+ */
+local void copy_block(s, buf, len, header)
+    deflate_state *s;
+    charf    *buf;    /* the input data */
+    unsigned len;     /* its length */
+    int      header;  /* true if block header must be written */
+{
+    bi_windup(s);        /* align on byte boundary */
+    s->last_eob_len = 8; /* enough lookahead for inflate */
+
+    if (header) {
+        put_short(s, (ush)len);   
+        put_short(s, (ush)~len);
+#ifdef DEBUG
+        s->bits_sent += 2*16;
+#endif
+    }
+#ifdef DEBUG
+    s->bits_sent += (ulg)len<<3;
+#endif
+    while (len--) {
+        put_byte(s, *buf++);
+    }
+}
diff --git a/ZLIB/trees.h b/ZLIB/trees.h
new file mode 100644
index 0000000..72facf9
--- /dev/null
+++ b/ZLIB/trees.h
@@ -0,0 +1,128 @@
+/* header created automatically with -DGEN_TREES_H */
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
diff --git a/ZLIB/uncompr.c b/ZLIB/uncompr.c
new file mode 100644
index 0000000..39f15b4
--- /dev/null
+++ b/ZLIB/uncompr.c
@@ -0,0 +1,58 @@
+/* uncompr.c -- decompress a memory buffer
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: uncompr.c,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#include "zlib.h"
+
+/* ===========================================================================
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+    /* Check for source > 64K on 16-bit machine: */
+    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
+
+    stream.next_out = dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+
+    err = inflateInit(&stream);
+    if (err != Z_OK) return err;
+
+    err = inflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        inflateEnd(&stream);
+        return err == Z_OK ? Z_BUF_ERROR : err;
+    }
+    *destLen = stream.total_out;
+
+    err = inflateEnd(&stream);
+    return err;
+}
diff --git a/ZLIB/zconf.h b/ZLIB/zconf.h
new file mode 100644
index 0000000..26756ec
--- /dev/null
+++ b/ZLIB/zconf.h
@@ -0,0 +1,279 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: zconf.h,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#ifndef _ZCONF_H
+#define _ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ */
+#ifdef Z_PREFIX
+#  define deflateInit_	z_deflateInit_
+#  define deflate	z_deflate
+#  define deflateEnd	z_deflateEnd
+#  define inflateInit_ 	z_inflateInit_
+#  define inflate	z_inflate
+#  define inflateEnd	z_inflateEnd
+#  define deflateInit2_	z_deflateInit2_
+#  define deflateSetDictionary z_deflateSetDictionary
+#  define deflateCopy	z_deflateCopy
+#  define deflateReset	z_deflateReset
+#  define deflateParams	z_deflateParams
+#  define inflateInit2_	z_inflateInit2_
+#  define inflateSetDictionary z_inflateSetDictionary
+#  define inflateSync	z_inflateSync
+#  define inflateSyncPoint z_inflateSyncPoint
+#  define inflateReset	z_inflateReset
+#  define compress	z_compress
+#  define compress2	z_compress2
+#  define uncompress	z_uncompress
+#  define adler32	z_adler32
+#  define crc32		z_crc32
+#  define get_crc_table z_get_crc_table
+
+#  define Byte		z_Byte
+#  define uInt		z_uInt
+#  define uLong		z_uLong
+#  define Bytef	        z_Bytef
+#  define charf		z_charf
+#  define intf		z_intf
+#  define uIntf		z_uIntf
+#  define uLongf	z_uLongf
+#  define voidpf	z_voidpf
+#  define voidp		z_voidp
+#endif
+
+#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
+#  define WIN32
+#endif
+#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
+#  ifndef __32BIT__
+#    define __32BIT__
+#  endif
+#endif
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#if defined(MSDOS) && !defined(__32BIT__)
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#if (defined(MSDOS) || defined(_WINDOWS) || defined(WIN32))  && !defined(STDC)
+#  define STDC
+#endif
+#if defined(__STDC__) || defined(__cplusplus) || defined(__OS2__)
+#  ifndef STDC
+#    define STDC
+#  endif
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const
+#  endif
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__) || defined(applec) ||defined(THINK_C) ||defined(__SC__)
+#  define NO_DUMMY_DECL
+#endif
+
+/* Old Borland C incorrectly complains about missing returns: */
+#if defined(__BORLANDC__) && (__BORLANDC__ < 0x500)
+#  define NEED_DUMMY_RETURN
+#endif
+
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#if (defined(M_I86SM) || defined(M_I86MM)) && !defined(__32BIT__)
+   /* MSC small or medium model */
+#  define SMALL_MEDIUM
+#  ifdef _MSC_VER
+#    define FAR _far
+#  else
+#    define FAR far
+#  endif
+#endif
+#if defined(__BORLANDC__) && (defined(__SMALL__) || defined(__MEDIUM__))
+#  ifndef __32BIT__
+#    define SMALL_MEDIUM
+#    define FAR _far
+#  endif
+#endif
+
+/* Compile with -DZLIB_DLL for Windows DLL support */
+#if defined(ZLIB_DLL)
+#  if defined(_WINDOWS) || defined(WINDOWS)
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+#    define ZEXPORT  WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA  WINAPIV
+#    else
+#      define ZEXPORTVA  FAR _cdecl _export
+#    endif
+#  endif
+#  if defined (__BORLANDC__)
+#    if (__BORLANDC__ >= 0x0500) && defined (WIN32)
+#      include <windows.h>
+#      define ZEXPORT __declspec(dllexport) WINAPI
+#      define ZEXPORTRVA __declspec(dllexport) WINAPIV
+#    else
+#      if defined (_Windows) && defined (__DLL__)
+#        define ZEXPORT _export
+#        define ZEXPORTVA _export
+#      endif
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  if defined (ZLIB_DLL)
+#    define ZEXTERN extern __declspec(dllexport)
+#  else
+#    define ZEXTERN extern __declspec(dllimport)
+#  endif
+#endif
+
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+
+#ifndef FAR
+#   define FAR
+#endif
+
+#if !defined(MACOS) && !defined(TARGET_OS_MAC)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void FAR *voidpf;
+   typedef void     *voidp;
+#else
+   typedef Byte FAR *voidpf;
+   typedef Byte     *voidp;
+#endif
+
+#ifdef HAVE_UNISTD_H
+#  include <sys/types.h> /* for off_t */
+#  include <unistd.h>    /* for SEEK_* and off_t */
+#  define z_off_t  off_t
+#endif
+#ifndef SEEK_SET
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+#ifndef z_off_t
+#  define  z_off_t long
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+#   pragma map(deflateInit_,"DEIN")
+#   pragma map(deflateInit2_,"DEIN2")
+#   pragma map(deflateEnd,"DEEND")
+#   pragma map(inflateInit_,"ININ")
+#   pragma map(inflateInit2_,"ININ2")
+#   pragma map(inflateEnd,"INEND")
+#   pragma map(inflateSync,"INSY")
+#   pragma map(inflateSetDictionary,"INSEDI")
+#   pragma map(inflate_blocks,"INBL")
+#   pragma map(inflate_blocks_new,"INBLNE")
+#   pragma map(inflate_blocks_free,"INBLFR")
+#   pragma map(inflate_blocks_reset,"INBLRE")
+#   pragma map(inflate_codes_free,"INCOFR")
+#   pragma map(inflate_codes,"INCO")
+#   pragma map(inflate_fast,"INFA")
+#   pragma map(inflate_flush,"INFLU")
+#   pragma map(inflate_mask,"INMA")
+#   pragma map(inflate_set_dictionary,"INSEDI2")
+#   pragma map(inflate_copyright,"INCOPY")
+#   pragma map(inflate_trees_bits,"INTRBI")
+#   pragma map(inflate_trees_dynamic,"INTRDY")
+#   pragma map(inflate_trees_fixed,"INTRFI")
+#   pragma map(inflate_trees_free,"INTRFR")
+#endif
+
+#endif /* _ZCONF_H */
diff --git a/ZLIB/zlib.h b/ZLIB/zlib.h
new file mode 100644
index 0000000..52cb529
--- /dev/null
+++ b/ZLIB/zlib.h
@@ -0,0 +1,893 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.1.4, March 11th, 2002
+
+  Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+
+  The data format used by the zlib library is described by RFCs (Request for
+  Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
+  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
+*/
+
+#ifndef _ZLIB_H
+#define _ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.1.4"
+
+/* 
+     The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed
+  data.  This version of the library supports only one compression method
+  (deflation) but other algorithms will be added later and will have the same
+  stream interface.
+
+     Compression can be done in a single step if the buffers are large
+  enough (for example if an input file is mmap'ed), or can be done by
+  repeated calls of the compression function.  In the latter case, the
+  application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+     The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio.
+
+     The library does not install any signal handler. The decoder checks
+  the consistency of the compressed data, so the library should never
+  crash even in case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    Bytef    *next_in;  /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total nb of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte should be put there */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total nb of bytes output so far */
+
+    char     *msg;      /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: ascii or binary */
+    uLong   adler;      /* adler32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+   The application must update next_in and avail_in when avail_in has
+   dropped to zero. It must update next_out and avail_out when avail_out
+   has dropped to zero. The application must initialize zalloc, zfree and
+   opaque before calling the init function. All other fields are set by the
+   compression library and must not be updated by the application.
+
+   The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree. This can be useful for custom
+   memory management. The compression library attaches no meaning to the
+   opaque value.
+
+   zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.
+
+   On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this
+   if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
+   pointers returned by zalloc for objects of exactly 65536 bytes *must*
+   have their offset normalized to zero. The default allocation function
+   provided by this library ensures this (see zutil.c). To reduce memory
+   requirements and avoid any allocation of 64K objects, at the expense of
+   compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
+
+   The fields total_in and total_out can be used for statistics or
+   progress reports. After compression, total_in holds the total size of
+   the uncompressed data and may be saved for use in the decompressor
+   (particularly if the decompressor wants to decompress everything in
+   a single step).
+*/
+
+                        /* constants */
+
+#define Z_NO_FLUSH      0
+#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
+#define Z_SYNC_FLUSH    2
+#define Z_FULL_FLUSH    3
+#define Z_FINISH        4
+/* Allowed flush values; see deflate() below for details */
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative
+ * values are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+/* compression levels */
+
+#define Z_FILTERED            1
+#define Z_HUFFMAN_ONLY        2
+#define Z_DEFAULT_STRATEGY    0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY   0
+#define Z_ASCII    1
+#define Z_UNKNOWN  2
+/* Possible values of the data_type field */
+
+#define Z_DEFLATED   8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is
+   not compatible with the zlib.h header file used by the application.
+   This check is automatically made by deflateInit and inflateInit.
+ */
+
+/* 
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+     Initializes the internal stream state for compression. The fields
+   zalloc, zfree and opaque must be initialized before by the caller.
+   If zalloc and zfree are set to Z_NULL, deflateInit updates them to
+   use default allocation functions.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at
+   all (the input data is simply copied a block at a time).
+   Z_DEFAULT_COMPRESSION requests a default compromise between speed and
+   compression (currently equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if level is not a valid compression level,
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).
+   msg is set to null if there is no error message.  deflateInit does not
+   perform any compression: this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may introduce some
+  output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows. deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly. This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary (in interactive applications).
+    Some output may be provided even if flush is not set.
+
+  Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating avail_in or avail_out accordingly; avail_out
+  should never be zero before the call. The application can consume the
+  compressed output when it wants, for example when the output buffer is full
+  (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
+  and with zero avail_out, it must be called again after making room in the
+  output buffer because there might be more output pending.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far. (In particular
+  avail_in is zero after the call if enough output space has been provided
+  before the call.)  Flushing may degrade compression for some compression
+  algorithms and so it should be used only when necessary.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+  the compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out).
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there
+  was enough output space; if deflate returns with Z_OK, this function must be
+  called again with Z_FINISH and more output space (updated avail_out) but no
+  more input data, until it returns with Z_STREAM_END or an error. After
+  deflate has returned Z_STREAM_END, the only possible operations on the
+  stream are deflateReset or deflateEnd.
+  
+    Z_FINISH can be used immediately after deflateInit if all the compression
+  is to be done in a single step. In this case, avail_out must be at least
+  0.1% larger than avail_in plus 12 bytes.  If deflate does not return
+  Z_STREAM_END, then it must be called again as described above.
+
+    deflate() sets strm->adler to the adler32 checksum of all input read
+  so far (that is, total_in bytes).
+
+    deflate() may update data_type if it can make a good guess about
+  the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered
+  binary. This field is only for information purposes and does not affect
+  the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
+  (for example avail_in or avail_out was zero).
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded). In the error case,
+   msg may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/* 
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+     Initializes the internal stream state for decompression. The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
+   value depends on the compression method), inflateInit determines the
+   compression method from the zlib header and allocates all data structures
+   accordingly; otherwise the allocation will be deferred to the first call of
+   inflate.  If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+   use default allocation functions.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller.  msg is set to null if there is no error
+   message. inflateInit does not perform any decompression apart from reading
+   the zlib header if present: this will be done by inflate().  (So next_in and
+   avail_in may be modified, but next_out and avail_out are unchanged.)
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may some
+  introduce some output latency (reading input without producing any output)
+  except when forced to flush.
+
+  The detailed semantics are as follows. inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in is updated and processing
+    will resume at this point for the next call of inflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there
+    is no more input data or no more space in the output buffer (see below
+    about the flush parameter).
+
+  Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating the next_* and avail_* values accordingly.
+  The application can consume the uncompressed output when it wants, for
+  example when the output buffer is full (avail_out == 0), or after each
+  call of inflate(). If inflate returns Z_OK and with zero avail_out, it
+  must be called again after making room in the output buffer because there
+  might be more output pending.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much
+  output as possible to the output buffer. The flushing behavior of inflate is
+  not specified for values of the flush parameter other than Z_SYNC_FLUSH
+  and Z_FINISH, but the current implementation actually flushes as much output
+  as possible anyway.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error. However if all decompression is to be performed in a single step
+  (a single call of inflate), the parameter flush should be set to
+  Z_FINISH. In this case all pending input is processed and all pending
+  output is flushed; avail_out must be large enough to hold all the
+  uncompressed data. (The size of the uncompressed data may have been saved
+  by the compressor for this purpose.) The next operation on this stream must
+  be inflateEnd to deallocate the decompression state. The use of Z_FINISH
+  is never required, but can be used to inform inflate that a faster routine
+  may be used for the single inflate() call.
+
+     If a preset dictionary is needed at this point (see inflateSetDictionary
+  below), inflate sets strm-adler to the adler32 checksum of the
+  dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise 
+  it sets strm->adler to the adler32 checksum of all output produced
+  so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or
+  an error code as described below. At the end of the stream, inflate()
+  checks that its computed adler32 checksum is equal to that saved by the
+  compressor and returns Z_STREAM_END only if the checksum is correct.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect
+  adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent
+  (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not
+  enough memory, Z_BUF_ERROR if no progress is possible or if there was not
+  enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR
+  case, the application may then call inflateSync to look for a good
+  compression block.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+   was inconsistent. In the error case, msg may be set but then points to a
+   static string (which must not be deallocated).
+*/
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*   
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+                                     int  level,
+                                     int  method,
+                                     int  windowBits,
+                                     int  memLevel,
+                                     int  strategy));
+
+     This is another version of deflateInit with more compression options. The
+   fields next_in, zalloc, zfree and opaque must be initialized before by
+   the caller.
+
+     The method parameter is the compression method. It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer).  It should be in the range 8..15 for this
+   version of the library. Larger values of this parameter result in better
+   compression at the expense of memory usage. The default value is 15 if
+   deflateInit is used instead.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state. memLevel=1 uses minimum memory but
+   is slow and reduces compression ratio; memLevel=9 uses maximum memory
+   for optimal speed. The default value is 8. See zconf.h for total memory
+   usage as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm. Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match).  Filtered data consists mostly of small values with a
+   somewhat random distribution. In this case, the compression algorithm is
+   tuned to compress them better. The effect of Z_FILTERED is to force more
+   Huffman coding and less string matching; it is somewhat intermediate
+   between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects
+   the compression ratio but not the correctness of the compressed output even
+   if it is not set appropriately.
+
+      deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
+   method). msg is set to null if there is no error message.  deflateInit2 does
+   not perform any compression: this will be done by deflate().
+*/
+                            
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output. This function must be called
+   immediately after deflateInit, deflateInit2 or deflateReset, before any
+   call of deflate. The compressor and decompressor must use exactly the same
+   dictionary (see inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary. Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size in
+   deflate or deflate2. Thus the strings most likely to be useful should be
+   put at the end of the dictionary, not at the front.
+
+     Upon return of this function, strm->adler is set to the Adler32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor. (The Adler32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.)
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if the compression method is bsort). deflateSetDictionary does not
+   perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter. The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and
+   can consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being NULL). msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to deflateEnd followed by deflateInit,
+   but does not free and reallocate all the internal compression state.
+   The stream will keep the same compression level and any other attributes
+   that may have been set by deflateInit2.
+
+      deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+				      int level,
+				      int strategy));
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2.  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different
+   strategy. If the compression level is changed, the input available so far
+   is compressed with the old level (and may be flushed); the new level will
+   take effect only at the next call of deflate().
+
+     Before the call of deflateParams, the stream state must be set as for
+   a call of deflate(), since the currently available input may have to
+   be compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
+   if strm->avail_out was zero.
+*/
+
+/*   
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+                                     int  windowBits));
+
+     This is another version of inflateInit with an extra parameter. The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library. The default value is 15 if inflateInit is used
+   instead. If a compressed stream with a larger window size is given as
+   input, inflate() will return with the error code Z_DATA_ERROR instead of
+   trying to allocate a larger window.
+
+      inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative
+   memLevel). msg is set to null if there is no error message.  inflateInit2
+   does not perform any decompression apart from reading the zlib header if
+   present: this will be done by inflate(). (So next_in and avail_in may be
+   modified, but next_out and avail_out are unchanged.)
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence. This function must be called immediately after a call of inflate
+   if this call returned Z_NEED_DICT. The dictionary chosen by the compressor
+   can be determined from the Adler32 value returned by this call of
+   inflate. The compressor and decompressor must use exactly the same
+   dictionary (see deflateSetDictionary).
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect Adler32 value). inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/* 
+    Skips invalid compressed data until a full flush point (see above the
+  description of deflate with Z_FULL_FLUSH) can be found, or until all
+  available input is skipped. No output is provided.
+
+    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
+  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
+  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
+  case, the application may save the current current value of total_in which
+  indicates where valid compressed data was found. In the error case, the
+  application may repeatedly call inflateSync, providing more input each time,
+  until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate all the internal decompression state.
+   The stream will keep attributes that may have been set by inflateInit2.
+
+      inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the
+   basic stream-oriented functions. To simplify the interface, some
+   default options are assumed (compression level and memory usage,
+   standard memory allocation functions). The source code of these
+   utility functions can easily be modified if you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be at least 0.1% larger than
+   sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+   compressed buffer.
+     This function can be used to compress a whole file at once if the
+   input file is mmap'ed.
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+/*
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+
+typedef voidp gzFile;
+
+ZEXTERN gzFile ZEXPORT gzopen  OF((const char *path, const char *mode));
+/*
+     Opens a gzip (.gz) file for reading or writing. The mode parameter
+   is as in fopen ("rb" or "wb") but can also include a compression level
+   ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
+   Huffman only compression as in "wb1h". (See the description
+   of deflateInit2 for more information about the strategy parameter.)
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.
+
+     gzopen returns NULL if the file could not be opened or if there was
+   insufficient memory to allocate the (de)compression state; errno
+   can be checked to distinguish the two cases (if errno is zero, the
+   zlib error is Z_MEM_ERROR).  */
+
+ZEXTERN gzFile ZEXPORT gzdopen  OF((int fd, const char *mode));
+/*
+     gzdopen() associates a gzFile with the file descriptor fd.  File
+   descriptors are obtained from calls like open, dup, creat, pipe or
+   fileno (in the file has been previously opened with fopen).
+   The mode parameter is as in gzopen.
+     The next call of gzclose on the returned gzFile will also close the
+   file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
+   descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
+     gzdopen returns NULL if there was insufficient memory to allocate
+   the (de)compression state.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+     Dynamically update the compression level or strategy. See the description
+   of deflateInit2 for the meaning of these parameters.
+     gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
+   opened for writing.
+*/
+
+ZEXTERN int ZEXPORT    gzread  OF((gzFile file, voidp buf, unsigned len));
+/*
+     Reads the given number of uncompressed bytes from the compressed file.
+   If the input file was not in gzip format, gzread copies the given number
+   of bytes into the buffer.
+     gzread returns the number of uncompressed bytes actually read (0 for
+   end of file, -1 for error). */
+
+ZEXTERN int ZEXPORT    gzwrite OF((gzFile file, 
+				   const voidp buf, unsigned len));
+/*
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of uncompressed bytes actually written
+   (0 in case of error).
+*/
+
+ZEXTERN int ZEXPORTVA   gzprintf OF((gzFile file, const char *format, ...));
+/*
+     Converts, formats, and writes the args to the compressed file under
+   control of the format string, as in fprintf. gzprintf returns the number of
+   uncompressed bytes actually written (0 in case of error).
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+      Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+      gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+      Reads bytes from the compressed file until len-1 characters are read, or
+   a newline character is read and transferred to buf, or an end-of-file
+   condition is encountered.  The string is then terminated with a null
+   character.
+      gzgets returns buf, or Z_NULL in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzputc OF((gzFile file, int c));
+/*
+      Writes c, converted to an unsigned char, into the compressed file.
+   gzputc returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzgetc OF((gzFile file));
+/*
+      Reads one byte from the compressed file. gzgetc returns this byte
+   or -1 in case of end of file or error.
+*/
+
+ZEXTERN int ZEXPORT    gzflush OF((gzFile file, int flush));
+/*
+     Flushes all pending output into the compressed file. The parameter
+   flush is as in the deflate() function. The return value is the zlib
+   error number (see function gzerror below). gzflush returns Z_OK if
+   the flush parameter is Z_FINISH and all output could be flushed.
+     gzflush should be called only when strictly necessary because it can
+   degrade compression.
+*/
+
+ZEXTERN z_off_t ZEXPORT    gzseek OF((gzFile file,
+				      z_off_t offset, int whence));
+/* 
+      Sets the starting position for the next gzread or gzwrite on the
+   given compressed file. The offset represents a number of bytes in the
+   uncompressed data stream. The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow. If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+      gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+/*
+     Rewinds the given file. This function is supported only for reading.
+
+   gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+/*
+     Returns the starting position for the next gzread or gzwrite on the
+   given compressed file. This position represents a number of bytes in the
+   uncompressed data stream.
+
+   gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+     Returns 1 when EOF has previously been detected reading the given
+   input stream, otherwise zero.
+*/
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+/*
+     Flushes all pending output if necessary, closes the compressed file
+   and deallocates all the (de)compression state. The return value is the zlib
+   error number (see function gzerror below).
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+     Returns the error message for the last error which occurred on the
+   given compressed file. errnum is set to zlib error number. If an
+   error occurred in the file system and not in the compression library,
+   errnum is set to Z_ERRNO and the application may consult errno
+   to get the exact error code.
+*/
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the
+   compression library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum. If buf is NULL, this function returns
+   the required initial value for the checksum.
+   An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+   much faster. Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+/*
+     Update a running crc with the bytes buf[0..len-1] and return the updated
+   crc. If buf is NULL, this function returns the required initial value
+   for the crc. Pre- and post-conditioning (one's complement) is performed
+   within this function so it shouldn't be done by the application.
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+#define deflateInit(strm, level) \
+        deflateInit_((strm), (level),       ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit(strm) \
+        inflateInit_((strm),                ZLIB_VERSION, sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+        deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                      (strategy),           ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+        inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
+
+
+#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL)
+    struct internal_state {int dummy;}; /* hack for buggy compilers */
+#endif
+
+ZEXTERN const char   * ZEXPORT zError           OF((int err));
+ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp z));
+ZEXTERN const uLongf * ZEXPORT get_crc_table    OF((void));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZLIB_H */
diff --git a/ZLIB/zutil.c b/ZLIB/zutil.c
new file mode 100644
index 0000000..0a163df
--- /dev/null
+++ b/ZLIB/zutil.c
@@ -0,0 +1,225 @@
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h 
+ */
+
+/* @(#) $Id: zutil.c,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#include "zutil.h"
+
+struct internal_state      {int dummy;}; /* for buggy compilers */
+
+#ifndef STDC
+extern void exit OF((int));
+#endif
+
+const char *z_errmsg[10] = {
+"need dictionary",     /* Z_NEED_DICT       2  */
+"stream end",          /* Z_STREAM_END      1  */
+"",                    /* Z_OK              0  */
+"file error",          /* Z_ERRNO         (-1) */
+"stream error",        /* Z_STREAM_ERROR  (-2) */
+"data error",          /* Z_DATA_ERROR    (-3) */
+"insufficient memory", /* Z_MEM_ERROR     (-4) */
+"buffer error",        /* Z_BUF_ERROR     (-5) */
+"incompatible version",/* Z_VERSION_ERROR (-6) */
+""};
+
+
+const char * ZEXPORT zlibVersion()
+{
+    return ZLIB_VERSION;
+}
+
+#ifdef DEBUG
+
+#  ifndef verbose
+#    define verbose 0
+#  endif
+int z_verbose = verbose;
+
+void z_error (m)
+    char *m;
+{
+    fprintf(stderr, "%s\n", m);
+    exit(1);
+}
+#endif
+
+/* exported to allow conversion of error code to string for compress() and
+ * uncompress()
+ */
+const char * ZEXPORT zError(err)
+    int err;
+{
+    return ERR_MSG(err);
+}
+
+
+#ifndef HAVE_MEMCPY
+
+void zmemcpy(dest, source, len)
+    Bytef* dest;
+    const Bytef* source;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = *source++; /* ??? to be unrolled */
+    } while (--len != 0);
+}
+
+int zmemcmp(s1, s2, len)
+    const Bytef* s1;
+    const Bytef* s2;
+    uInt  len;
+{
+    uInt j;
+
+    for (j = 0; j < len; j++) {
+        if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+    }
+    return 0;
+}
+
+void zmemzero(dest, len)
+    Bytef* dest;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = 0;  /* ??? to be unrolled */
+    } while (--len != 0);
+}
+#endif
+
+#ifdef __TURBOC__
+#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__)
+/* Small and medium model in Turbo C are for now limited to near allocation
+ * with reduced MAX_WBITS and MAX_MEM_LEVEL
+ */
+#  define MY_ZCALLOC
+
+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
+ * must fix the pointer. Warning: the pointer must be put back to its
+ * original form in order to free it, use zcfree().
+ */
+
+#define MAX_PTR 10
+/* 10*64K = 640K */
+
+local int next_ptr = 0;
+
+typedef struct ptr_table_s {
+    voidpf org_ptr;
+    voidpf new_ptr;
+} ptr_table;
+
+local ptr_table table[MAX_PTR];
+/* This table is used to remember the original form of pointers
+ * to large buffers (64K). Such pointers are normalized with a zero offset.
+ * Since MSDOS is not a preemptive multitasking OS, this table is not
+ * protected from concurrent access. This hack doesn't work anyway on
+ * a protected system like OS/2. Use Microsoft C instead.
+ */
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    voidpf buf = opaque; /* just to make some compilers happy */
+    ulg bsize = (ulg)items*size;
+
+    /* If we allocate less than 65520 bytes, we assume that farmalloc
+     * will return a usable pointer which doesn't have to be normalized.
+     */
+    if (bsize < 65520L) {
+        buf = farmalloc(bsize);
+        if (*(ush*)&buf != 0) return buf;
+    } else {
+        buf = farmalloc(bsize + 16L);
+    }
+    if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
+    table[next_ptr].org_ptr = buf;
+
+    /* Normalize the pointer to seg:0 */
+    *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
+    *(ush*)&buf = 0;
+    table[next_ptr++].new_ptr = buf;
+    return buf;
+}
+
+void  zcfree (voidpf opaque, voidpf ptr)
+{
+    int n;
+    if (*(ush*)&ptr != 0) { /* object < 64K */
+        farfree(ptr);
+        return;
+    }
+    /* Find the original pointer */
+    for (n = 0; n < next_ptr; n++) {
+        if (ptr != table[n].new_ptr) continue;
+
+        farfree(table[n].org_ptr);
+        while (++n < next_ptr) {
+            table[n-1] = table[n];
+        }
+        next_ptr--;
+        return;
+    }
+    ptr = opaque; /* just to make some compilers happy */
+    Assert(0, "zcfree: ptr not found");
+}
+#endif
+#endif /* __TURBOC__ */
+
+
+#if defined(M_I86) && !defined(__32BIT__)
+/* Microsoft C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
+#  define _halloc  halloc
+#  define _hfree   hfree
+#endif
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    if (opaque) opaque = 0; /* to make compiler happy */
+    return _halloc((long)items, size);
+}
+
+void  zcfree (voidpf opaque, voidpf ptr)
+{
+    if (opaque) opaque = 0; /* to make compiler happy */
+    _hfree(ptr);
+}
+
+#endif /* MSC */
+
+
+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
+
+#ifndef STDC
+extern voidp  calloc OF((uInt items, uInt size));
+extern void   free   OF((voidpf ptr));
+#endif
+
+voidpf zcalloc (opaque, items, size)
+    voidpf opaque;
+    unsigned items;
+    unsigned size;
+{
+    if (opaque) items += size - size; /* make compiler happy */
+    return (voidpf)calloc(items, size);
+}
+
+void  zcfree (opaque, ptr)
+    voidpf opaque;
+    voidpf ptr;
+{
+    free(ptr);
+    if (opaque) return; /* make compiler happy */
+}
+
+#endif /* MY_ZCALLOC */
diff --git a/ZLIB/zutil.h b/ZLIB/zutil.h
new file mode 100644
index 0000000..1bf5bc6
--- /dev/null
+++ b/ZLIB/zutil.h
@@ -0,0 +1,220 @@
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-2002 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id: zutil.h,v 1.1 2014/03/04 21:20:44 uid42406 Exp $ */
+
+#ifndef _Z_UTIL_H
+#define _Z_UTIL_H
+
+#include "zlib.h"
+
+#ifdef STDC
+#  include <stddef.h>
+#  include <string.h>
+#  include <stdlib.h>
+#endif
+#ifdef NO_ERRNO_H
+    extern int errno;
+#else
+#   include <errno.h>
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+typedef unsigned char  uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long  ulg;
+
+extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+  return (strm->msg = (char*)ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+        /* common constants */
+
+#ifndef DEF_WBITS
+#  define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES    2
+/* The three kinds of block type */
+
+#define MIN_MATCH  3
+#define MAX_MATCH  258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+        /* target dependencies */
+
+#ifdef MSDOS
+#  define OS_CODE  0x00
+#  if defined(__TURBOC__) || defined(__BORLANDC__)
+#    if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
+       /* Allow compilation with ANSI keywords only enabled */
+       void _Cdecl farfree( void *block );
+       void *_Cdecl farmalloc( unsigned long nbytes );
+#    else
+#     include <alloc.h>
+#    endif
+#  else /* MSC or DJGPP */
+#    include <malloc.h>
+#  endif
+#endif
+
+#ifdef OS2
+#  define OS_CODE  0x06
+#endif
+
+#ifdef WIN32 /* Window 95 & Windows NT */
+#  define OS_CODE  0x0b
+#endif
+
+#if defined(VAXC) || defined(VMS)
+#  define OS_CODE  0x02
+#  define F_OPEN(name, mode) \
+     fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
+#endif
+
+#ifdef AMIGA
+#  define OS_CODE  0x01
+#endif
+
+#if defined(ATARI) || defined(atarist)
+#  define OS_CODE  0x05
+#endif
+
+#if defined(MACOS) || defined(TARGET_OS_MAC)
+#  define OS_CODE  0x07
+#  if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
+#    include <unix.h> /* for fdopen */
+#  else
+#    ifndef fdopen
+#      define fdopen(fd,mode) NULL /* No fdopen() */
+#    endif
+#  endif
+#endif
+
+#ifdef __50SERIES /* Prime/PRIMOS */
+#  define OS_CODE  0x0F
+#endif
+
+#ifdef TOPS20
+#  define OS_CODE  0x0a
+#endif
+
+#if defined(_BEOS_) || defined(RISCOS)
+#  define fdopen(fd,mode) NULL /* No fdopen() */
+#endif
+
+#if (defined(_MSC_VER) && (_MSC_VER > 600))
+#  define fdopen(fd,type)  _fdopen(fd,type)
+#endif
+
+
+        /* Common defaults */
+
+#ifndef OS_CODE
+#  define OS_CODE  0x03  /* assume Unix */
+#endif
+
+#ifndef F_OPEN
+#  define F_OPEN(name, mode) fopen((name), (mode))
+#endif
+
+         /* functions */
+
+#ifdef HAVE_STRERROR
+   extern char *strerror OF((int));
+#  define zstrerror(errnum) strerror(errnum)
+#else
+#  define zstrerror(errnum) ""
+#endif
+
+#if defined(pyr)
+#  define NO_MEMCPY
+#endif
+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+  * You may have to use the same strategy for Borland C (untested).
+  * The __SC__ check is for Symantec.
+  */
+#  define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+#  define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+#  ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+#    define zmemcpy _fmemcpy
+#    define zmemcmp _fmemcmp
+#    define zmemzero(dest, len) _fmemset(dest, 0, len)
+#  else
+#    define zmemcpy memcpy
+#    define zmemcmp memcmp
+#    define zmemzero(dest, len) memset(dest, 0, len)
+#  endif
+#else
+   extern void zmemcpy  OF((Bytef* dest, const Bytef* source, uInt len));
+   extern int  zmemcmp  OF((const Bytef* s1, const Bytef* s2, uInt len));
+   extern void zmemzero OF((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  include <stdio.h>
+   extern int z_verbose;
+   extern void z_error    OF((char *m));
+#  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+#  define Trace(x) {if (z_verbose>=0) fprintf x ;}
+#  define Tracev(x) {if (z_verbose>0) fprintf x ;}
+#  define Tracevv(x) {if (z_verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+
+typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf,
+				       uInt len));
+voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
+void   zcfree  OF((voidpf opaque, voidpf ptr));
+
+#define ZALLOC(strm, items, size) \
+           (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
+
+#endif /* _Z_UTIL_H */
diff --git a/test.bat b/test.bat
deleted file mode 100644
index 12ad4a6..0000000
--- a/test.bat
+++ /dev/null
@@ -1,5 +0,0 @@
-\Research\PoissonRecon\PoissonRecon\Bin\x64\Release\PoissonRecon.exe --in \data\PointSets\eagle_cleaned.ply --color 16 --depth 10 --out eagle.d.ply --density --voxel eagel.d.iso --voxelDepth 8 --dirichlet
-\Research\PoissonRecon\PoissonRecon\Bin\x64\Release\PoissonRecon.exe --in \data\PointSets\eagle_cleaned.ply --color 16 --depth 10 --out eagle.n.ply --density --voxel eagel.n.iso --voxelDepth 8
-\Research\PoissonRecon\PoissonRecon\Bin\x64\Release\SurfaceTrimmer.exe --in eagle.d.ply --out eagle.d.trim.ply --trim 6
-\Research\PoissonRecon\PoissonRecon\Bin\x64\Release\SurfaceTrimmer.exe --in eagle.n.ply --out eagle.n.trim.ply --trim 6
-