Skip to content

Commit

Permalink
dvcfilesystem.get: prefer download from repo.fs before dvc.fs
Browse files Browse the repository at this point in the history
In case the filesystem is opened from a local repository, if the file exists
as a dvc tracked file, and also exists in a local workspace, DVCFileSystem.get
should prefer the local file, similar to `get_file` method.
  • Loading branch information
skshetry committed Aug 29, 2024
1 parent 4f3fb15 commit 927b419
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions dvc/fs/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,25 +558,29 @@ def _get( # noqa: C901
for d in _dirs:
os.mkdir(d)

repo_fs = self.repo.fs

def _get_file(arg):
dvc_fs, src, dest, info = arg
dvc_info = info.get("dvc_info")
if dvc_info and dvc_fs:
fs_info = info.get("fs_info")
if dvc_fs and dvc_info and not fs_info:
dvc_path = dvc_info["name"]
dvc_fs.get_file(
dvc_path, dest, callback=callback, info=dvc_info, **kwargs
)
else:
self.get_file(src, dest, callback=callback, **kwargs)
fs_path = fs_info["name"]
repo_fs.get_file(fs_path, dest, callback=callback, info=info, **kwargs)
return src, dest, info

with ThreadPoolExecutor(max_workers=batch_size) as executor:
return list(executor.imap_unordered(_get_file, _files))

def get_file(self, rpath, lpath, **kwargs):
dvc_info = kwargs.pop("info", {}).pop("dvc_info", None)
key = self._get_key_from_relative(rpath)
fs_path = self._from_key(key)

dirpath = os.path.dirname(lpath)
if dirpath:
# makedirs raises error if the string is empty
Expand All @@ -590,7 +594,7 @@ def get_file(self, rpath, lpath, **kwargs):
raise

dvc_path = _get_dvc_path(dvc_fs, subkey)
return dvc_fs.get_file(dvc_path, lpath, **kwargs)
return dvc_fs.get_file(dvc_path, lpath, info=dvc_info, **kwargs)

def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
if maxdepth is not None:
Expand Down

0 comments on commit 927b419

Please sign in to comment.