diff --git a/src/programs/crawl.py b/src/programs/crawl.py new file mode 100755 index 0000000000000000000000000000000000000000..a21d44349e54852bdb5f319c92f85a6124adf951 --- /dev/null +++ b/src/programs/crawl.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +from caosadvancedtools.crawler import Crawler +from example_cfood import ExampleCFood +import argparse +import caosdb as db +from argparse import RawTextHelpFormatter + +def get_parser(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + + return parser + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + + fi = db.File(file=__file__, path="filepath") + f = ExampleCFood(pattern="(.*)craw(.*)") + c = Crawler(food=[f]) + c.crawl([fi]) diff --git a/src/programs/example_cfood.py b/src/programs/example_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..20572d16689a50b45cc8a183188990127e1d3f13 --- /dev/null +++ b/src/programs/example_cfood.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +from caosadvancedtools.cfood import AbstractCFood + +class ExampleCFood(AbstractCFood): + def create_identifiables(self, crawled_file, match): + entities = {} + entities["exp"] = db.Record() + #import IPython + # IPython.embed() + entities["exp"].add_parent(name="Experiment") + entities["exp"].add_property(name="species", value=match.group) + + return entities + + def update_identifiables(self, entities, crawled_file, match): + entities["exp"].add_property(name="date", + value=datetime.today().isoformat()) + db.Container().extend(entities.values).update()