Add, replace, cache and delete artifacts

import pytest
import lamindb as ln

ln.setup.login("testuser1")
ln.setup.init(storage="s3://lamindb-ci/test-add-replace-cache")
✓ logged in with email testuser1@lamin.ai (uid: DzTjkKse)
→ go to: https://lamin.ai/testuser1/test-add-replace-cache
! updating cloud SQLite 's3://lamindb-ci/test-add-replace-cache/23d258b416df505da3a53878e9797d26.lndb' of instance 'testuser1/test-add-replace-cache'
! locked instance (to unlock and push changes to the cloud SQLite file, call: lamin disconnect)

Save with auto-managed (key=None)

AUTO_KEY_PREFIX = ln.core.storage.paths.AUTO_KEY_PREFIX
root = ln.settings.storage.root
artifact = ln.Artifact("./test-files/iris.csv", description="iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='rjFuBMC6r1iHYmNv0000', is_latest=True, description='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', _hash_type='md5', visibility=1, _key_is_virtual=True, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:22 UTC)
key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"
assert key_path.exists()
cache_csv_path = artifact.cache()
print(cache_csv_path)
assert cache_csv_path.suffix == ".csv"
! run input wasn't tracked, call `ln.track()` and re-run
/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/.lamindb/rjFuBMC6r1iHYmNv0000.csv
artifact.replace("./test-files/iris.data")
artifact.save()
! no run & transform got linked, call `ln.track()` & re-run
Artifact(uid='rjFuBMC6r1iHYmNv0000', is_latest=True, description='iris.csv', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', _hash_type='md5', visibility=1, _key_is_virtual=True, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:22 UTC)
old_key_path = key_path
new_key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"

The suffix changed:

print(old_key_path)
print(new_key_path)
assert not old_key_path.exists()
assert new_key_path.exists()
s3://lamindb-ci/test-add-replace-cache/.lamindb/rjFuBMC6r1iHYmNv0000.csv
s3://lamindb-ci/test-add-replace-cache/.lamindb/rjFuBMC6r1iHYmNv0000.data
cache_data_path = artifact.cache()
print(cache_data_path)
assert cache_data_path.suffix == ".data"
assert cache_data_path.stat().st_mtime >= cache_csv_path.stat().st_mtime
! run input wasn't tracked, call `ln.track()` and re-run
/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/.lamindb/rjFuBMC6r1iHYmNv0000.data
artifact.delete(permanent=True)

Save with manually passed real key

ln.settings.creation._artifact_use_virtual_keys = False
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='lqj1tOiOr9ussv7a0000', is_latest=True, key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', _hash_type='md5', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:24 UTC)
key_path = root / "iris.csv"
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='lqj1tOiOr9ussv7a0000', is_latest=True, key='iris.csv', suffix='.csv', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', _hash_type='md5', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:24 UTC)

Check paths: no changes here, as the suffix didn’t change.

old_key_path = key_path
new_key_path = root / "new_iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/new_iris.csv')
assert old_key_path.exists()
assert not new_key_path.exists()
artifact.replace("./test-files/iris.data")
! no run & transform got linked, call `ln.track()` & re-run
! replacing the file will replace key 'iris.csv' with 'iris.data' and delete 'iris.csv' upon `save()`
artifact.save()
Artifact(uid='lqj1tOiOr9ussv7a0000', is_latest=True, key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', _hash_type='md5', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:24 UTC)
new_key_path = root / "iris.data"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/iris.data')
assert not old_key_path.exists()
assert new_key_path.exists()
artifact.delete(permanent=True, storage=True)

Save from memory

import pandas as pd
iris = pd.read_csv("./test-files/iris.csv")
artifact = ln.Artifact.from_df(iris, description="iris_store", key="iris.parquet")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='KDJpNWGrkVxxtbos0000', is_latest=True, description='iris_store', key='iris.parquet', suffix='.parquet', type='dataset', size=4510, hash='UKx0yKUjcJ_ZQT98GYSnbA', _hash_type='md5', _accessor='DataFrame', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:27 UTC)
key_path = root / "iris.parquet"
assert key_path.exists()
artifact.replace(data=iris[:-1])
! no run & transform got linked, call `ln.track()` & re-run
assert artifact.key == "iris.parquet"
artifact.save()
Artifact(uid='KDJpNWGrkVxxtbos0000', is_latest=True, description='iris_store', key='iris.parquet', suffix='.parquet', type='dataset', size=4490, hash='_pTyWwZcwx9SqWqz9kxJbQ', _hash_type='md5', _accessor='DataFrame', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:27 UTC)
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
! replacing the file will replace key 'iris.parquet' with 'iris.csv' and delete 'iris.parquet' upon `save()`
artifact.save()
Artifact(uid='KDJpNWGrkVxxtbos0000', is_latest=True, description='iris_store', key='iris.csv', suffix='.csv', type='dataset', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', _hash_type='md5', _accessor='DataFrame', visibility=1, _key_is_virtual=False, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:27 UTC)
old_key_path = key_path
new_key_path = root / "iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/iris.parquet')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-cache/iris.csv')
assert not old_key_path.exists()
assert new_key_path.exists()
# we use the path in the next section
path_in_storage = artifact.path
artifact.delete(permanent=True, storage=False)
→ a file/folder remains here: s3://lamindb-ci/test-add-replace-cache/iris.csv

Save with manually passed virtual key

ln.settings.creation._artifact_use_virtual_keys = True
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='q0BpG0f0Xj16hhao0000', is_latest=True, key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', _hash_type='md5', visibility=1, _key_is_virtual=True, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:29 UTC)
with pytest.raises(ValueError):
    artifact.replace(path_in_storage)
! no run & transform got linked, call `ln.track()` & re-run
# return an existing artifact if the hash is the same
assert artifact == artifact.replace("./test-files/iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
→ returning existing artifact with same hash: Artifact(uid='q0BpG0f0Xj16hhao0000', is_latest=True, key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', _hash_type='md5', visibility=1, _key_is_virtual=True, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:29 UTC)
fpath = artifact.path
assert fpath.suffix == ".csv" and fpath.stem == artifact.uid
artifact.replace("./test-files/iris.data")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='q0BpG0f0Xj16hhao0000', is_latest=True, key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', _hash_type='md5', visibility=1, _key_is_virtual=True, storage_id=1, created_by_id=1, created_at=2024-10-25 17:08:29 UTC)
assert artifact.key == "iris.data"
assert not fpath.exists()
fpath = artifact.path
assert fpath.suffix == ".data" and fpath.stem == artifact.uid
artifact.delete(permanent=True, storage=True)
path_in_storage.unlink()
ln.setup.delete("test-add-replace-cache", force=True)
→ deleted storage record on hub 137768267d4556a491fc02b58f1b630b
→ deleted instance record on hub 23d258b416df505da3a53878e9797d26