-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
74 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
lakefs_client==0.104.0 | ||
pyspark==3.3.2 | ||
pyspark==3.3.2 | ||
pytest==7.4.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pytest | ||
|
||
import lakefs_client | ||
from lakefs_client.client import LakeFSClient | ||
from lakefs_client.models import * | ||
from lakefs_client.model.access_key_credentials import AccessKeyCredentials | ||
from lakefs_client.model.comm_prefs_input import CommPrefsInput | ||
from lakefs_client.model.setup import Setup | ||
from lakefs_client.model.repository_creation import RepositoryCreation | ||
import pyspark | ||
from pyspark.sql import SparkSession | ||
from pyspark.conf import SparkConf | ||
|
||
def test_diff_two_same_branches(spark, lfs_client, lakefs_repo): | ||
print("repo name ", lakefs_repo) | ||
df = spark.read.option("inferSchema","true").option("multiline","true").csv("./test/data-sets/film_permits.csv") | ||
df.write.saveAsTable("lakefs.main.nyc.permits") | ||
|
||
#Commit, create a new branch, check that the tables are the same | ||
lfs_client.commits.commit(lakefs_repo, "main", CommitCreation(message="Initial data load")) | ||
lfs_client.branches.create_branch(lakefs_repo, BranchCreation(name="dev", source="main")) | ||
df_main = spark.read.table("lakefs.main.nyc.permits") | ||
df_dev = spark.read.table("lakefs.dev.nyc.permits") | ||
assert (df_main.schema == df_dev.schema) and (df_main.collect() == df_dev.collect()), "main and dev tables should be equal" |