diff --git a/01 Sync Docs.bash b/01 Sync Docs.bash new file mode 100644 index 0000000..4f61fa8 --- /dev/null +++ b/01 Sync Docs.bash @@ -0,0 +1,3 @@ +#!/bin/bash + +rsync -r root@10.11.99.1:/home/root/.local/share/remarkable/xochitl . \ No newline at end of file diff --git a/02 Parse Metadata.py b/02 Parse Metadata.py new file mode 100644 index 0000000..4c03c57 --- /dev/null +++ b/02 Parse Metadata.py @@ -0,0 +1,42 @@ + + +from pathlib import Path +import json + + +## Parameter + +IN_FOLDER = './Library/' + + + +## Helper Functions +def list_files_pathlib(path=Path('.')): + files = [] + for entry in path.iterdir(): + if entry.is_file(): + files.append(entry) + elif entry.is_dir(): + files.extend(list_files_pathlib(entry)) + return files + + +## File and content parsing + + + + + +## Script + +# Specify the directory path you want to start from + +file_list = list_files_pathlib(Path(IN_FOLDER)) +print(len(file_list)) + +meta_data_files = [i for i in file_list if i.suffix == '.metadata'] + +data_files = [i for i in file_list if i.suffix == '.rm'] + + +print(meta_data_files) \ No newline at end of file diff --git a/0a test.ipynb b/0a test.ipynb new file mode 100644 index 0000000..a9574dd --- /dev/null +++ b/0a test.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "437f8866", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import json\n", + "import hashlib\n", + "\n", + "## Parameter\n", + "\n", + "IN_FOLDER = './Library/'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0f01fd", + "metadata": {}, + "outputs": [], + "source": [ + "## Helper Functions\n", + "def list_files_pathlib(path=Path('.')):\n", + " files = []\n", + " for entry in path.iterdir():\n", + " if entry.is_file():\n", + " files.append(entry)\n", + " elif entry.is_dir():\n", + " files.extend(list_files_pathlib(entry))\n", + " return files\n", + "\n", + "# Specify the directory path you want to start from\n", + "\n", + "file_list = list_files_pathlib(Path(IN_FOLDER))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe452ca8", + "metadata": {}, + "outputs": [], + "source": [ + "data_files = [i for i in file_list if i.suffix == '.rm']\n", + "meta_data_files = [i for i in file_list if i.suffix == '.metadata']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73c6e17a", + "metadata": {}, + "outputs": [], + "source": [ + "meta_data_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a3b5926", + "metadata": {}, + "outputs": [], + "source": [ + "[i for i in file_list if '03ec4477-8df9-49c1-96e4-dfca75f32623' in str(i)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b04fdfa2", + "metadata": {}, + "outputs": [], + "source": [ + "## scripts to build document structure\n", + "\n", + "\n", + "## Todo: check which documents needs updates via hashes\n", + "\n", + "\n", + "def parse_document_data(filename):\n", + " data = json.load(open(filename))\n", + " page_list = data['cPages']['pages']\n", + " pages_data = []\n", + " for element in page_list:\n", + " page_data = {\n", + " 'page_id': element['id'],\n", + " 'filename': filename.parent / filename.stem / element['id'], # construct path to subpages,\n", + " }\n", + " pages_data.append(page_data)\n", + " return pages_data\n", + "\n", + "\n", + "\n", + "def parse_metadata(filename):\n", + " data = json.load(open(filename))\n", + " content_filename = filename.parent / str(filename.stem + '.content')\n", + " return {\n", + " 'last_modified': data['lastModified'],\n", + " 'document_name': data['visibleName'],\n", + " 'document_type': data['type'],\n", + " 'parent_folder': data['parent'],\n", + " 'page_ids': parse_document_data(content_filename)\n", + " }\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb699ffa", + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "# parse_metadata('fc2e4b57-6ce4-48ae-8452-b6d7873967aa.metadata')\n", + "\n", + "test = parse_metadata(meta_data_files[12])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1deb665", + "metadata": {}, + "outputs": [], + "source": [ + "test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31e993f7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Py2025", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}