Skip to content

Add long path support wrt #39 #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion file-io.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ flag os-string
default: False
manual: False

flag long-paths
description: Enable a hack for ad-hoc long path support on Windows
default: True
manual: True

library
default-language: Haskell2010

Expand All @@ -51,6 +56,9 @@ library
else
build-depends: filepath >= 1.4.100.0 && < 1.5.0.0

if flag(long-paths)
cpp-options: -DLONG_PATHS

exposed-modules:
System.File.OsPath
System.File.OsPath.Internal
Expand Down Expand Up @@ -111,6 +119,8 @@ test-suite Properties
main-is: Properties.hs
type: exitcode-stdio-1.0
default-language: Haskell2010
build-depends: base >=4.13.0.0 && <5, bytestring, tasty, tasty-hunit, file-io, filepath, temporary
build-depends: base >=4.13.0.0 && <5, bytestring, directory, tasty, tasty-hunit, file-io, filepath, temporary
ghc-options: -Wall -threaded -rtsopts "-with-rtsopts=-N10"
if flag(long-paths)
cpp-options: -DLONG_PATHS

34 changes: 33 additions & 1 deletion tests/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,21 @@ import GHC.IO.Exception (IOErrorType(..), IOException(..))
import System.IO
import System.IO.Temp
import qualified Data.ByteString as BS
#if defined(LONG_PATHS)
import Control.Monad (when)
import System.Directory.OsPath (createDirectory)
import System.IO.Error (catchIOError)
#endif


main :: IO ()
main = defaultMain $ testGroup "All"
[ testGroup "System.File.OsPath"
[ testCase "readFile . writeFile" writeFileReadFile
[
#if defined(LONG_PATHS)
testCase "writeFile (very long path)" writeFileLongPath,
#endif
testCase "readFile . writeFile" writeFileReadFile
, testCase "readFile . writeFile . writeFile" writeWriteFileReadFile
, testCase "readFile . appendFile . writeFile" appendFileReadFile
, testCase "iomode: ReadFile does not allow write" iomodeReadFile
Expand Down Expand Up @@ -56,6 +65,29 @@ main = defaultMain $ testGroup "All"
]
]

#if defined(LONG_PATHS)
writeFileLongPath :: Assertion
writeFileLongPath = do
withSystemTempDirectory "test" $ \baseDir' -> do
baseDir <- OSP.encodeFS baseDir'
let longName = mconcat (replicate 10 [osp|its_very_long|])
let longDir = baseDir </> longName </> longName

supportsLongPaths <- do
-- create 2 dirs because 1 path segment by itself can't exceed MAX_PATH
-- tests: [createDirectory]
createDirectory (baseDir </> longName)
createDirectory longDir
return True
`catchIOError` \ _ ->
return False

when supportsLongPaths $ do
OSP.writeFile (longDir </> [osp|foo|]) "test"
contents <- OSP.readFile (longDir </> [osp|foo|])
"test" @=? contents
#endif

writeFileReadFile :: Assertion
writeFileReadFile = do
withSystemTempDirectory "test" $ \baseDir' -> do
Expand Down
3 changes: 1 addition & 2 deletions tests/T15Win.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import qualified System.File.PlatformPath as PFP
import System.IO
import System.IO.Temp

import Control.Exception (bracketOnError)
import Data.Bits
import System.OsPath.Windows ( WindowsPath, pstr )
import qualified System.OsPath.Windows as WS
Expand All @@ -36,7 +35,7 @@ main = withSystemTempDirectory "tar-test" $ \baseDir' -> do
]

openFile32 :: WindowsPath -> IOMode -> IO Win32.HANDLE
openFile32 fp iomode =
openFile32 fp _iomode =
WS.createFile
fp
Win32.gENERIC_READ
Expand Down
188 changes: 176 additions & 12 deletions windows/System/File/Platform.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import System.OsPath.Windows ( WindowsPath )
import qualified System.OsPath.Windows as WS
import Foreign.C.Types

import qualified System.OsString.Windows as WS hiding (decodeFS)
import System.OsString.Windows ( encodeUtf, WindowsString )
import qualified System.Win32 as Win32
import qualified System.Win32.WindowsString.File as WS
Expand Down Expand Up @@ -43,18 +42,29 @@ import Text.Printf (printf)

#if MIN_VERSION_filepath(1, 5, 0)
import System.OsString.Encoding
import "os-string" System.OsString.Internal.Types (WindowsString(..), WindowsChar(..))
import qualified "os-string" System.OsString.Data.ByteString.Short as BC
#else
import Data.Coerce (coerce)
import System.OsPath.Encoding
import "filepath" System.OsString.Internal.Types (WindowsString(..), WindowsChar(..))
import qualified "filepath" System.OsPath.Data.ByteString.Short.Word16 as BC
#endif

import System.IO.Error (modifyIOError, ioeSetFileName)
import GHC.IO.Encoding.UTF16 (mkUTF16le)
import GHC.IO.Encoding.Failure (CodingFailureMode(TransliterateCodingFailure))
import Control.Exception (displayException, Exception)

#if defined(LONG_PATHS)
import System.IO.Error (ioeSetLocation, ioeGetLocation, catchIOError)
import Data.Char (isAlpha, isAscii, toUpper)
import qualified System.Win32.WindowsString.Info as WS
#endif

-- | Open a file and return the 'Handle'.
openFile :: WindowsPath -> IOMode -> IO Handle
openFile fp iomode = bracketOnError
openFile fp' iomode = (`ioeSetWsPath` fp') `modifyIOError` do
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this function is going to have different semantics than the base one even with this PR included.

As far as I see base's Path -> Handle counterpant for Windows is defined at https://github.com/ghc/ghc/blob/fd40eaa17c6ce8716ec2eacc95beae194a935352/libraries/ghc-internal/src/GHC/Internal/IO/Windows/Handle.hsc#L853.

There still going to be extra things that base does and this package doesn't, like some sort of file access optimization, no idea what it does though: https://github.com/ghc/ghc/blob/fd40eaa17c6ce8716ec2eacc95beae194a935352/libraries/ghc-internal/src/GHC/Internal/IO/Windows/Handle.hsc#L875.

There's also going to be handling of locked files that is not done in this package as far as I see, though I haven't checked above openFile function maybe there's something somewhere. Locking is checked after aforementioned optimizations https://github.com/ghc/ghc/blob/fd40eaa17c6ce8716ec2eacc95beae194a935352/libraries/ghc-internal/src/GHC/Internal/IO/Windows/Handle.hsc#L877.

There's also some attempt at truncation if we're overwriting file https://github.com/ghc/ghc/blob/fd40eaa17c6ce8716ec2eacc95beae194a935352/libraries/ghc-internal/src/GHC/Internal/IO/Windows/Handle.hsc#L890.

All in all this would mean that file-io is not a drop-in replacement for what base does and someone may be able to observe the difference by switching to file-io just like with long paths.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All in all this would mean that file-io is not a drop-in replacement for what base does and someone may be able to observe the difference by switching to file-io just like with long paths.

Honestly, I don't feel like I want this to be a strict drop-in replacement, especially since this package uses proper Win32 functionality and there is no posix emulation layer involved. Even with the native winIO manager in GHC, there may be subtle differences.

Long path support is definitely something major, so supporting it makes sense.

For other invariants, I think we'd need to find a test that demonstrates different behavior first.

Copy link
Member Author

@hasufell hasufell Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Truncation is already tested here when writing over an existing file:

file-io/tests/Properties.hs

Lines 196 to 205 in e2b5ebc

existingFile2' :: Assertion
existingFile2' = do
withSystemTempDirectory "test" $ \baseDir' -> do
baseDir <- OSP.encodeFS baseDir'
let fp = baseDir </> [osp|foo|]
OSP.writeFile fp "test"
r <- try @IOException $ do
OSP.openExistingFile fp WriteMode >>= \h -> BS.hPut h "boo" >> hClose h
OSP.readFile (baseDir </> [osp|foo|])
Right "boo" @=? r

And it seems correct to me, as we set it to truncate here:

WriteMode -> Win32.tRUNCATE_EXISTING
AppendMode -> Win32.oPEN_EXISTING

fp <- furnishPath fp'
bracketOnError
(WS.createFile
fp
accessMode
Expand All @@ -71,7 +81,7 @@ openFile fp iomode = bracketOnError
#endif
Nothing)
Win32.closeHandle
(toHandle fp iomode)
(toHandle fp' iomode)
where
accessMode = case iomode of
ReadMode -> Win32.gENERIC_READ
Expand Down Expand Up @@ -104,7 +114,9 @@ writeShareMode =

-- | Open an existing file and return the 'Handle'.
openExistingFile :: WindowsPath -> IOMode -> IO Handle
openExistingFile fp iomode = bracketOnError
openExistingFile fp' iomode = (`ioeSetWsPath` fp') `modifyIOError` do
fp <- furnishPath fp'
bracketOnError
(WS.createFile
fp
accessMode
Expand Down Expand Up @@ -220,12 +232,12 @@ rand_string = do
return $ WS.pack $ fmap (WS.unsafeFromChar) (printf "%x-%x-%x" r1 r2 r3)

lenientDecode :: WindowsString -> String
lenientDecode ws = let utf16le' = WS.decodeWith utf16le_b ws
ucs2' = WS.decodeWith ucs2le ws
in case (utf16le', ucs2') of
(Right s, ~_) -> s
(_, Right s) -> s
(Left _, Left _) -> error "lenientDecode: failed to decode"
lenientDecode wstr = let utf16le' = WS.decodeWith utf16le_b wstr
ucs2' = WS.decodeWith ucs2le wstr
in case (utf16le', ucs2') of
(Right s, ~_) -> s
(_, Right s) -> s
(Left _, Left _) -> error "lenientDecode: failed to decode"


toHandle :: WindowsPath -> IOMode -> Win32.HANDLE -> IO Handle
Expand All @@ -248,3 +260,155 @@ any_ = coerce BC.any

#endif

ioeSetWsPath :: IOError -> WindowsPath -> IOError
ioeSetWsPath err =
ioeSetFileName err .
rightOrError .
WS.decodeWith (mkUTF16le TransliterateCodingFailure)

rightOrError :: Exception e => Either e a -> a
rightOrError (Left e) = error (displayException e)
rightOrError (Right a) = a

-- inlined stuff from directory package
furnishPath :: WindowsPath -> IO WindowsPath
Copy link

@sergv sergv Mar 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like a good idea for implementing furnishPath would be to reuse whatever base does via C function here https://github.com/ghc/ghc/blob/fd40eaa17c6ce8716ec2eacc95beae194a935352/libraries/ghc-internal/src/GHC/Internal/IO/Windows/Paths.hs.

Apart from matching semantics it would mean less copying around. The only drawback is that someone will need to check with each ghc release whether the foreign function is still named the same.

Otherwise all those conversions below to and from lists of characters somewhat undermine the cool idea of sticking with byte arrays.

#if !defined(LONG_PATHS)
furnishPath path = pure path
#else
furnishPath path =
(toExtendedLengthPath <$> rawPrependCurrentDirectory path)
`catchIOError` \ _ ->
pure path

toExtendedLengthPath :: WindowsPath -> WindowsPath
toExtendedLengthPath path =
if WS.isRelative path
then simplifiedPath
else
case WS.toChar <$> simplifiedPath' of
'\\' : '?' : '?' : '\\' : _ -> simplifiedPath
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a way to leverage short byte string's isPrefixOf here and below?

'\\' : '\\' : '?' : '\\' : _ -> simplifiedPath
'\\' : '\\' : '.' : '\\' : _ -> simplifiedPath
'\\' : '\\' : _ ->
ws "\\\\?\\UNC" <> WS.pack (drop 1 simplifiedPath')
_ -> ws "\\\\?\\" <> simplifiedPath
where simplifiedPath = simplifyWindows path
simplifiedPath' = WS.unpack simplifiedPath

rawPrependCurrentDirectory :: WindowsPath -> IO WindowsPath
rawPrependCurrentDirectory path
| WS.isRelative path =
((`ioeAddLocation` "prependCurrentDirectory") .
(`ioeSetWsPath` path)) `modifyIOError` do
getFullPathName path
| otherwise = pure path

simplifyWindows :: WindowsPath -> WindowsPath
simplifyWindows path
| path == mempty = mempty
| drive' == ws "\\\\?\\" = drive' <> subpath
| otherwise = simplifiedPath
where
simplifiedPath = WS.joinDrive drive' subpath'
(drive, subpath) = WS.splitDrive path
drive' = upperDrive (normaliseTrailingSep (normalisePathSeps drive))
subpath' = appendSep . avoidEmpty . prependSep . WS.joinPath .
stripPardirs . expandDots . skipSeps .
WS.splitDirectories $ subpath

upperDrive d = case WS.unpack d of
c : k : s
| isAlpha (WS.toChar c), WS.toChar k == ':', all WS.isPathSeparator s ->
-- unsafeFromChar is safe here since all characters are ASCII.
WS.pack (WS.unsafeFromChar (toUpper (WS.toChar c)) : WS.unsafeFromChar ':' : s)
_ -> d
skipSeps =
(WS.pack <$>) .
filter (not . (`elem` (pure <$> WS.pathSeparators))) .
(WS.unpack <$>)
stripPardirs | pathIsAbsolute || subpathIsAbsolute = dropWhile (== ws "..")
| otherwise = id
prependSep | subpathIsAbsolute = (WS.pack [WS.pathSeparator] <>)
| otherwise = id
avoidEmpty | not pathIsAbsolute
, drive == mempty || hasTrailingPathSep -- prefer "C:" over "C:."
= emptyToCurDir
| otherwise = id
appendSep p | hasTrailingPathSep, not (pathIsAbsolute && p == mempty)
= WS.addTrailingPathSeparator p
| otherwise = p
pathIsAbsolute = not (WS.isRelative path)
subpathIsAbsolute = any WS.isPathSeparator (take 1 (WS.unpack subpath))
hasTrailingPathSep = WS.hasTrailingPathSeparator subpath

expandDots :: [WindowsPath] -> [WindowsPath]
expandDots = reverse . go []
where
go ys' xs' =
case xs' of
[] -> ys'
x : xs
| x == ws "." -> go ys' xs
| x == ws ".." ->
case ys' of
[] -> go (x : ys') xs
y : ys
| y == ws ".." -> go (x : ys') xs
| otherwise -> go ys xs
| otherwise -> go (x : ys') xs

-- | Remove redundant trailing slashes and pick the right kind of slash.
normaliseTrailingSep :: WindowsPath -> WindowsPath
normaliseTrailingSep path = do
let path' = reverse (WS.unpack path)
let (sep, path'') = span WS.isPathSeparator path'
let addSep = if null sep then id else (WS.pathSeparator :)
WS.pack (reverse (addSep path''))

normalisePathSeps :: WindowsPath -> WindowsPath
normalisePathSeps p = WS.pack (normaliseChar <$> WS.unpack p)
where normaliseChar c = if WS.isPathSeparator c then WS.pathSeparator else c

emptyToCurDir :: WindowsPath -> WindowsPath
emptyToCurDir path
| path == mempty = ws "."
| otherwise = path

ws :: String -> WindowsString
ws = rightOrError . WS.encodeUtf

getFullPathName :: WindowsPath -> IO WindowsPath
getFullPathName path =
fromExtendedLengthPath <$> WS.getFullPathName (toExtendedLengthPath path)

ioeAddLocation :: IOError -> String -> IOError
ioeAddLocation e loc = do
ioeSetLocation e newLoc
where
newLoc = loc <> if null oldLoc then "" else ":" <> oldLoc
oldLoc = ioeGetLocation e

fromExtendedLengthPath :: WindowsPath -> WindowsPath
fromExtendedLengthPath ePath =
case WS.unpack ePath of
c1 : c2 : c3 : c4 : path
| (WS.toChar <$> [c1, c2, c3, c4]) == "\\\\?\\" ->
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a way to leverage short byte string's isPrefixOf here?

case path of
c5 : c6 : c7 : subpath@(c8 : _)
| (WS.toChar <$> [c5, c6, c7, c8]) == "UNC\\" ->
WS.pack (c8 : subpath)
drive : col : subpath
-- if the path is not "regular", then the prefix is necessary
-- to ensure the path is interpreted literally
| WS.toChar col == ':', isDriveChar drive, isPathRegular subpath ->
WS.pack path
_ -> ePath
_ -> ePath
where
isDriveChar drive = isAlpha (WS.toChar drive) && isAscii (WS.toChar drive)
isPathRegular path =
not ('/' `elem` (WS.toChar <$> path) ||
ws "." `elem` WS.splitDirectories (WS.pack path) ||
ws ".." `elem` WS.splitDirectories (WS.pack path))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to change

           ws "." `elem` WS.splitDirectories (WS.pack path) ||
           ws ".." `elem` WS.splitDirectories (WS.pack path))

to

           any (\x -> x == ws ".." || x == ws ".") WS.splitDirectories (WS.pack path))


#endif
Loading