# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
__docformat__ = 'restructuredtext'
from os.path import join as opj
from glob import glob
import itertools
from .base import Interface
from ..utils import rmtree
from ..support.param import Parameter
from ..consts import (
ARCHIVES_TEMP_DIR,
ANNEX_TEMP_DIR,
ANNEX_TRANSFER_DIR,
SEARCH_INDEX_DOTGITDIR,
)
from datalad.support.gitrepo import GitRepo
from datalad.support.constraints import EnsureNone
from datalad.distribution.dataset import (
EnsureDataset,
require_dataset,
datasetmethod,
)
from datalad.interface.common_opts import (
recursion_flag,
recursion_limit,
)
from datalad.interface.results import get_status_dict
from datalad.interface.utils import eval_results
from datalad.interface.base import build_doc
from logging import getLogger
lgr = getLogger('datalad.api.clean')
# needed API commands
import datalad.distribution.subdatasets
@build_doc
class Clean(Interface):
"""Clean up after DataLad (possible temporary files etc.)
Removes extracted temporary archives, etc.
Examples:
$ datalad clean
"""
_params_ = dict(
dataset=Parameter(
args=("-d", "--dataset"),
doc="""specify the dataset to perform the clean operation on. If
no dataset is given, an attempt is made to identify the dataset
in current working directory""",
constraints=EnsureDataset() | EnsureNone()),
# TODO: --info -- which performs dry run just summarizing what is to be cleaned up
# TODO: Python only???
what=Parameter(
args=("--what",),
dest='what',
choices=('cached-archives', 'annex-tmp', 'annex-transfer', 'search-index'),
nargs="*",
doc="""What to clean. If none specified -- all known targets are
cleaned"""),
recursive=recursion_flag,
recursion_limit=recursion_limit,
)
@staticmethod
@datasetmethod(name='clean')
@eval_results
def __call__(dataset=None, what=None, recursive=False, recursion_limit=None):
ds = require_dataset(dataset, purpose='clean-up')
res_kwargs = dict(action='clean', logger=lgr, refds=ds.path)
for wds in itertools.chain([ds], ds.subdatasets(
fulfilled=True,
recursive=recursive,
recursion_limit=recursion_limit,
return_type='generator',
result_renderer='disabled',
result_xfm='datasets') if recursive else []):
d = wds.path
gitdir = GitRepo.get_git_dir(d)
DIRS_PLURAL = ("directory", "directories")
FILES_PLURAL = ("file", "files")
for dirpath, flag, msg, sing_pl in [
(ARCHIVES_TEMP_DIR, "cached-archives",
"temporary archive", DIRS_PLURAL),
(ANNEX_TEMP_DIR, "annex-tmp",
"temporary annex", FILES_PLURAL),
(ANNEX_TRANSFER_DIR, "annex-transfer",
"annex temporary transfer", DIRS_PLURAL),
(opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index',
"metadata search index", FILES_PLURAL),
]:
topdir = opj(d, dirpath)
lgr.debug("Considering to clean %s:%s", d, dirpath)
if not ((what is None) or (flag in what)):
yield get_status_dict(
path=topdir, status='notneeded', type='directory', **res_kwargs)
continue
paths = glob(opj(topdir, '*'))
if not paths:
yield get_status_dict(
path=topdir, status='notneeded', type='directory', **res_kwargs)
continue
pl = len(paths) > 1
message = ("Removed %d %s %s: %s",
len(paths), msg, sing_pl[int(pl)],
", ".join(sorted([x[len(topdir) + 1:] for x in paths])))
rmtree(topdir)
yield get_status_dict(
path=topdir, status='ok', type='dir', message=message,
**res_kwargs)