{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# h5glance\n", "\n", "* `h5ls` shows too little, `h5ls -rv` too much\n", "* `hdfview` needs X forwarding, lots of clicking" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Terminal view" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[94msample.h5\u001b[0m\r\n", "├\u001b[94mgroup\u001b[0m\r\n", "│ └\u001b[94msubgroup\u001b[0m\r\n", "│ ├\u001b[1m0\u001b[0m\t[float64: 10 × 5 × 0] (1 attributes)\r\n", "│ ├\u001b[1m1\u001b[0m\t[float64: 10 × 5 × 1] (1 attributes)\r\n", "│ ├\u001b[1m2\u001b[0m\t[float64: 10 × 5 × 2] (1 attributes)\r\n", "│ ├\u001b[1m3\u001b[0m\t[float64: 10 × 5 × 3] (1 attributes)\r\n", "│ ├\u001b[1m4\u001b[0m\t[float64: 10 × 5 × 4] (1 attributes)\r\n", "│ ├\u001b[1m5\u001b[0m\t[float64: 10 × 5 × 5] (1 attributes)\r\n", "│ ├\u001b[1m6\u001b[0m\t[float64: 10 × 5 × 6] (1 attributes)\r\n", "│ ├\u001b[1m7\u001b[0m\t[float64: 10 × 5 × 7] (1 attributes)\r\n", "│ ├\u001b[1m8\u001b[0m\t[float64: 10 × 5 × 8] (1 attributes)\r\n", "│ └\u001b[1m9\u001b[0m\t[float64: 10 × 5 × 9] (1 attributes)\r\n", "└\u001b[95mlatest\u001b[0m\t-> group/subgroup/9\r\n", "\r\n" ] } ], "source": [ "!h5glance sample.h5" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sample.h5/group/subgroup/6\r\n", " dtype: float64\r\n", " shape: 10 × 5 × 6\r\n", " maxshape: 10 × 5 × 6\r\n", " layout: Contiguous\r\n", "\r\n", "sample data:\r\n", "[[0. 0. 0. 0. 0. 0.]\r\n", " [0. 0. 0. 0. 0. 0.]\r\n", " [0. 0. 0. 0. 0. 0.]\r\n", " [0. 0. 0. 0. 0. 0.]\r\n", " [0. 0. 0. 0. 0. 0.]]\r\n", "\r\n", "1 attributes:\r\n", "* square: 36\r\n", "\r\n" ] } ], "source": [ "!h5glance sample.h5 group/subgroup/6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Tab completion** for bash & zsh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Notebook" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import h5py\n", "from h5glance import H5Glance" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f = h5py.File('sample.h5', 'r')\n", "f" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
        • 0 [📋]: 10 × 5 × 0 entries, dtype: <f8
        • 1 [📋]: 10 × 5 × 1 entries, dtype: <f8
        • 2 [📋]: 10 × 5 × 2 entries, dtype: <f8
        • 3 [📋]: 10 × 5 × 3 entries, dtype: <f8
        • 4 [📋]: 10 × 5 × 4 entries, dtype: <f8
        • 5 [📋]: 10 × 5 × 5 entries, dtype: <f8
        • 6 [📋]: 10 × 5 × 6 entries, dtype: <f8
        • 7 [📋]: 10 × 5 × 7 entries, dtype: <f8
        • 8 [📋]: 10 × 5 × 8 entries, dtype: <f8
        • 9 [📋]: 10 × 5 × 9 entries, dtype: <f8
    • latest [📋]: 10 × 5 × 9 entries, dtype: <f8
" ], "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "H5Glance(f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "📋: Copy path to clipboard" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f['/group/subgroup/1']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# h5py" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## File & Group\n", "\n", "Like nested dictionaries:" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f['group']" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "group -- \n", "latest -- \n" ] } ], "source": [ "for key, value in f.items():\n", " print(key, '--', value)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f['group']['subgroup']" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f['group/subgroup']" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f['group/subgroup/8']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Datasets\n", "\n", "Like numpy arrays, slice to read data:" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.]])" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = f['group/subgroup/8']\n", "ds[0, 0:3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create small datasets simply from numpy arrays:" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3, 4, 5],\n", " [ 6, 7, 8, 9, 10, 11],\n", " [12, 13, 14, 15, 16, 17],\n", " [18, 19, 20, 21, 22, 23],\n", " [24, 25, 26, 27, 28, 29]])" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "arr = np.arange(30).reshape(5, 6)\n", "arr" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f2 = h5py.File('demo.h5', 'w')\n", "f2['data'] = arr\n", "f2['data']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create big datasets without data, then fill them piecewise:" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 2, 3, 4, 5],\n", " [0, 1, 2, 3, 4, 5],\n", " [0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0]])" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "big_dataset = f2.create_dataset('big_data', shape=(1_000_000, 5, 6), dtype=np.int64)\n", "\n", "for a in range(10):\n", " big_dataset[a] = arr\n", "\n", "big_dataset[8:12, 0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Datasets can grow:" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(110, 5, 6)" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "growing_dataset = f2.create_dataset('growing_data', shape=(0, 5, 6), maxshape=(None, 5, 6), dtype=np.int64)\n", "\n", "for a in range(103):\n", " dim0 = growing_dataset.shape[0]\n", " if a >= dim0:\n", " growing_dataset.resize((dim0 + 10, 5, 6))\n", " \n", " growing_dataset[a] = arr\n", "\n", "growing_dataset.shape" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "f2.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Low-level API" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.id" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17832" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.id.get_offset()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dcpl = ds.id.get_create_plist()\n", "dcpl.get_nfilters()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Virtual datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![](vds_concept.svg)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# Create source files (1.h5 to 4.h5)\n", "for n in range(1, 5):\n", " with h5py.File('{}.h5'.format(n), 'w') as f:\n", " f['data'] = np.arange(100) + n" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Virtual dataset:\n", "[[ 1 2 3 4 5 6 7 8 9 10]\n", " [ 2 3 4 5 6 7 8 9 10 11]\n", " [ 3 4 5 6 7 8 9 10 11 12]\n", " [ 4 5 6 7 8 9 10 11 12 13]]\n" ] } ], "source": [ "# Assemble virtual dataset\n", "layout = h5py.VirtualLayout(shape=(4, 100), dtype='i4')\n", "\n", "for n in range(1, 5):\n", " filename = \"{}.h5\".format(n)\n", " vsource = h5py.VirtualSource(filename, 'data', shape=(100,))\n", "\n", " layout[n - 1] = vsource\n", "\n", "# Add virtual dataset to output file\n", "with h5py.File(\"VDS.h5\", 'w', libver='latest') as f:\n", " f.create_virtual_dataset('data', layout, fillvalue=-5)\n", " print(\"Virtual dataset:\")\n", " print(f['data'][:, :10])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[94mVDS.h5\u001b[0m\r\n", "└\u001b[1mdata\u001b[0m\t[int32: 4 × 100] virtual\r\n", "\r\n" ] } ], "source": [ "!h5glance VDS.h5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }