Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2519,12 +2519,32 @@ fn inner_list_branches<'local>(
} else {
JObject::null()
};
let jbranch_identifier = env.new_object(
"java/util/ArrayList",
"(I)V",
&[JValue::Int(contents.identifier.version_mapping.len() as i32)],
)?;
for (version, uuid) in contents.identifier.version_mapping.iter() {
let juuid = env.new_string(uuid)?;
let jmapping = env.new_object(
"org/lance/Branch$BranchVersionMapping",
"(JLjava/lang/String;)V",
&[JValue::Long(*version as i64), JValue::Object(&juuid)],
)?;
env.call_method(
&jbranch_identifier,
"add",
"(Ljava/lang/Object;)Z",
&[JValue::Object(&jmapping)],
)?;
}
let jbranch = env.new_object(
"org/lance/Branch",
"(Ljava/lang/String;Ljava/lang/String;JJI)V",
"(Ljava/lang/String;Ljava/lang/String;Ljava/util/List;JJI)V",
&[
JValue::Object(&jname),
JValue::Object(&jparent),
JValue::Object(&jbranch_identifier),
JValue::Long(contents.parent_version as i64),
JValue::Long(contents.create_at as i64),
JValue::Int(contents.manifest_size as i32),
Expand Down
68 changes: 63 additions & 5 deletions java/src/main/java/org/lance/Branch.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,78 @@
package org.lance;

import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;

import java.util.List;
import java.util.Objects;
import java.util.Optional;

/**
* Branch metadata aligned with Rust's BranchContents. name is the branch name, parentBranch may be
* null (indicating main), parentVersion is the version on which the branch was created, createAt is
* the unix timestamp (seconds), and manifestSize is the size of the referenced manifest file in
* bytes.
* null (indicating main), branchIdentifier is the lineage chain {@code [(version, uuid), ...]},
* parentVersion is the version on which the branch was created, createAt is the unix timestamp
* (seconds), and manifestSize is the size of the referenced manifest file in bytes.
*/
public class Branch {
/** A single lineage hop in the Rust BranchIdentifier.version_mapping vector. */
public static class BranchVersionMapping {
private final long version;
private final String uuid;

public BranchVersionMapping(long version, String uuid) {
this.version = version;
this.uuid = Objects.requireNonNull(uuid);
}

public long getVersion() {
return version;
}

public String getUuid() {
return uuid;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this).add("version", version).add("uuid", uuid).toString();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
BranchVersionMapping that = (BranchVersionMapping) o;
return version == that.version && Objects.equals(uuid, that.uuid);
}

@Override
public int hashCode() {
return Objects.hash(version, uuid);
}
}

private final String name;
private final Optional<String> parentBranch;
private final ImmutableList<BranchVersionMapping> branchIdentifier;
private final long parentVersion;
private final long createAt;
private final int manifestSize;

public Branch(
String name, String parentBranch, long parentVersion, long createAt, int manifestSize) {
this(name, parentBranch, ImmutableList.of(), parentVersion, createAt, manifestSize);
}

public Branch(
String name,
String parentBranch,
List<BranchVersionMapping> branchIdentifier,
long parentVersion,
Comment on lines 79 to +83
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve the previous Branch constructor overload

This change removes the only public Branch(String, String, long, long, int) constructor and replaces it with a new signature that requires List<BranchVersionMapping>, which is a source/binary incompatible API break for downstream Java users who instantiate Branch directly. After upgrading, existing callers will fail to compile (or hit NoSuchMethodError when artifacts are mixed). Please keep the old constructor as an overload and delegate to the new one with a default lineage list to maintain compatibility.

Useful? React with 👍 / 👎.

long createAt,
int manifestSize) {
this.name = name;
this.parentBranch = Optional.ofNullable(parentBranch);
this.branchIdentifier = ImmutableList.copyOf(Objects.requireNonNull(branchIdentifier));
this.parentVersion = parentVersion;
this.createAt = createAt;
this.manifestSize = manifestSize;
Expand All @@ -48,6 +99,10 @@ public Optional<String> getParentBranch() {
return parentBranch;
}

public ImmutableList<BranchVersionMapping> getBranchIdentifier() {
return branchIdentifier;
}

public long getParentVersion() {
return parentVersion;
}
Expand All @@ -65,6 +120,7 @@ public String toString() {
return MoreObjects.toStringHelper(this)
.add("name", name)
.add("parentBranch", parentBranch)
.add("branchIdentifier", branchIdentifier)
.add("parentVersion", parentVersion)
.add("createAt", createAt)
.add("manifestSize", manifestSize)
Expand All @@ -80,11 +136,13 @@ public boolean equals(Object o) {
&& createAt == branch.createAt
&& manifestSize == branch.manifestSize
&& Objects.equals(name, branch.name)
&& Objects.equals(parentBranch, branch.parentBranch);
&& Objects.equals(parentBranch, branch.parentBranch)
&& Objects.equals(branchIdentifier, branch.branchIdentifier);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid unstable lineage IDs in Branch equality

Including branchIdentifier in equals makes equality unstable for legacy branches whose refs JSON lacks identifier: Rust fills that with BranchIdentifier::none(), which generates a fresh UUID during deserialization, so the same logical branch can compare unequal across separate branches().list() calls. This can break client-side map/set lookups and cache keys for older datasets; consider excluding this field from equality (or ensuring legacy identifiers are deterministic before exposing them).

Useful? React with 👍 / 👎.

}

@Override
public int hashCode() {
return Objects.hash(name, parentBranch, parentVersion, createAt, manifestSize);
return Objects.hash(
name, parentBranch, branchIdentifier, parentVersion, createAt, manifestSize);
}
}
9 changes: 9 additions & 0 deletions java/src/test/java/org/lance/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1709,12 +1709,21 @@ void testBranches(@TempDir Path tempDir) {
assertEquals("branch1", branch1Meta.getName());
assertEquals(2, branch1Meta.getParentVersion());
assertFalse(branch1Meta.getParentBranch().isPresent());
assertEquals(1, branch1Meta.getBranchIdentifier().size());
assertEquals(2, branch1Meta.getBranchIdentifier().get(0).getVersion());
assertFalse(branch1Meta.getBranchIdentifier().get(0).getUuid().isEmpty());
assertTrue(branch1Meta.getCreateAt() > 0);
assertTrue(branch1Meta.getManifestSize() > 0);

assertEquals("branch2", branch2Meta.getName());
assertTrue(branch2Meta.getParentBranch().isPresent());
assertEquals("branch1", branch2Meta.getParentBranch().get());
assertEquals(2, branch2Meta.getBranchIdentifier().size());
assertEquals(
branch1Meta.getBranchIdentifier().get(0),
branch2Meta.getBranchIdentifier().get(0));
assertEquals(3, branch2Meta.getBranchIdentifier().get(1).getVersion());
assertFalse(branch2Meta.getBranchIdentifier().get(1).getUuid().isEmpty());
assertEquals(3, branch2Meta.getParentVersion());
assertTrue(branch2Meta.getCreateAt() > 0);
assertTrue(branch2Meta.getManifestSize() > 0);
Expand Down
1 change: 1 addition & 0 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4375,6 +4375,7 @@ class Tag(TypedDict):

class Branch(TypedDict):
parent_branch: Optional[str]
branch_identifier: List[Tuple[int, str]]
parent_version: int
create_at: int
manifest_size: int
Expand Down
12 changes: 10 additions & 2 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5231,11 +5231,16 @@ def test_branches(tmp_path: Path):
branch1.tags.create("main_latest", (None, None))
branch1.tags.create("main_latest2", ("main", None))
branch1.create_branch("branch_from_main", ("main", None))
branches_with_main = branch1.branches.list()
assert branch1.tags.list()["branch1_latest"]["branch"] == "branch1"
assert branch1.tags.list()["main_latest"]["branch"] is None
assert branch1.tags.list()["main_latest2"]["branch"] is None
assert branch1.branches.list()["branch_from_main"]["parent_branch"] is None
assert branch1.branches.list()["branch_from_main"]["parent_version"] == 1
assert branches_with_main["branch_from_main"]["parent_branch"] is None
assert branches_with_main["branch_from_main"]["branch_identifier"][0][0] == 1
assert isinstance(
branches_with_main["branch_from_main"]["branch_identifier"][0][1], str
)
assert branches_with_main["branch_from_main"]["parent_version"] == 1
assert branch1.checkout_version("main_latest").latest_version == 1
assert branch1.checkout_version("main_latest2").latest_version == 1
assert branch1.checkout_version(("branch_from_main", None)).latest_version == 1
Expand All @@ -5261,6 +5266,9 @@ def test_branches(tmp_path: Path):
b1_meta = branches["branch1"]
assert isinstance(b1_meta["parent_version"], int)
assert b1_meta["manifest_size"] > 0
assert b1_meta["branch_identifier"][0][0] == b1_meta["parent_version"]
assert isinstance(b1_meta["branch_identifier"][0][1], str)
assert len(b1_meta["branch_identifier"][0][1]) > 0
assert "create_at" in b1_meta

try:
Expand Down
2 changes: 2 additions & 0 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1818,6 +1818,7 @@ impl Dataset {
for (name, meta) in branches.iter() {
let dict = PyDict::new(py);
dict.set_item("parent_branch", meta.parent_branch.clone())?;
dict.set_item("branch_identifier", meta.identifier.version_mapping.clone())?;
dict.set_item("parent_version", meta.parent_version)?;
dict.set_item("create_at", meta.create_at)?;
dict.set_item("manifest_size", meta.manifest_size)?;
Expand Down Expand Up @@ -1852,6 +1853,7 @@ impl Dataset {
for (name, meta) in ordered.into_iter() {
let dict = PyDict::new(py);
dict.set_item("parent_branch", meta.parent_branch.clone())?;
dict.set_item("branch_identifier", meta.identifier.version_mapping.clone())?;
dict.set_item("parent_version", meta.parent_version)?;
dict.set_item("create_at", meta.create_at)?;
dict.set_item("manifest_size", meta.manifest_size)?;
Expand Down
Loading