Skip to content
Snippets Groups Projects
Select Git revision
  • 661d63fc9b5eb516754754d7c09c1b7b5855110e
  • main default protected
2 results

crawl.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    crawl.py 2.61 KiB
    #!/usr/bin/env python3
    # encoding: utf-8
    #
    # ** header v3.0
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2020,2021 IndiScale GmbH <info@indiscale.com>
    # Copyright (C) 2020,2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
    # Copyright (C) 2018 Research Group Biomedical Physics
    # Max-Planck-Institute for Dynamics and Self-Organization Göttingen
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    # ** end header
    #
    """
    This script invokes a CaosDB Crawler.
    """
    
    import argparse
    import logging
    import os
    import sys
    from argparse import RawTextHelpFormatter
    
    import caosdb as db
    from caosadvancedtools.cfood import fileguide
    from caosadvancedtools.crawler import FileCrawler
    from caosadvancedtools.guard import UPDATE
    
    from example_cfood import ExampleCFood
    
    
    def get_parser():
    
        parser = argparse.ArgumentParser(description=__doc__,
                                         formatter_class=RawTextHelpFormatter)
        parser.add_argument("path",
                            help="the subtree of files below the given (local) "
                            " path will be considered.")
    
        return parser
    
    
    def local_access(path):
        return path
    
    
    if __name__ == "__main__":
        parser = get_parser()
        args = parser.parse_args()
    
        fileguide.access = local_access
    
        # logging config
        logger = logging.getLogger("caosadvancedtools")
        logger.addHandler(logging.StreamHandler(sys.stdout))
        logger.setLevel(logging.DEBUG)
    
        # create file objects manually since the files do not exist on the server
        # and cannot be queried using 'FileCrawler.query_files`
        to_be_crawled = []
    
        for root, dirs, files in os.walk(args.path):
            to_be_crawled.extend(
                [db.File(path=os.path.join(root, f)) for f in files])
    
        c = FileCrawler(files=to_be_crawled, use_cache=False,
                        abort_on_exception=False,
                        interactive=False,
                        cfood_types=[
                            ExampleCFood,
                        ],
                        )
        c.crawl(security_level=UPDATE, path=args.path)