Skip to content

Commit

Permalink
Handle arbitrary sidecar files
Browse files Browse the repository at this point in the history
Currently support exists for handling .xmp sidecars. This change adds
support for arbitrary sidecar extensions. It also expands the set of
sidecars which are handled by default. The new default set, and the
reason this set was chosen, is:

 .xmp - general metadata format for all image types
 .json - generated by google takeout when exporting from g photos
 .yml/.yaml - used by Photoprism

Users can override this default set by passing the new --sidecars
option. For example, to handle .xyz, users should pass

  --sidecars="xmp,json,yml,xyz"
  • Loading branch information
qlyoung committed Aug 31, 2023
1 parent cdbc8d4 commit 49563a4
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 23 deletions.
30 changes: 30 additions & 0 deletions phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,36 @@ def parse_args(args=sys.argv[1:]):
""",
)

parser.add_argument(
"--sidecars",
default=",".join(Phockup.DEFAULT_SIDECAR_EXTENSIONS),
action="store",
help="""
Override the set of extensions that are considered to be sidecar files.
Sidecars are files that have the same name as an image or video file, but a
different extension. They typically contain additional metadata pertaining to
the image or video file.
File extensions which are considered to be sidecars if they are named the same
as a corresponding image file are:
"""
+ str(Phockup.DEFAULT_SIDECAR_EXTENSIONS)
+ """
So, for example, if image.jpg exists, then image.xmp (or image.jpg.xmp) will be
considered a sidecar file of image.jpg.
When moving the main file, sidecars will inherit the name of the main file and
be moved to the same location rather than being placed in the unknown
directory.
Using this argument you can change this set. For example, to only consider XMP
and JSON:
--sidecars='xmp,json'
""",
)

exclusive_group_debug_silent = parser.add_mutually_exclusive_group()

exclusive_group_debug_silent.add_argument(
Expand Down
27 changes: 27 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,33 @@ The output may look like this, but with more fields:
If the correct date is in `DateTimeOriginal`, you can include the option `--date-field=DateTimeOriginal` to get date information from it.
To set multiple fields to be tried in order until a valid date is found, just join them with spaces in a quoted string like `"CreateDate FileModifyDate"`.

### Handle sidecars
Sidecars are files that have the same name as an image or video file, but a different extension. They typically contain additional metadata pertaining to the image or video file.

File extensions which are considered to be sidecars if they are named the same as a corresponding image file are:

* .xmp
* .json
* .yaml
* .yml

So, for example, if `image.jpg` exists, then `image.xmp` (or `image.jpg.xmp`) will be considered a sidecar file of image.jpg.

When moving the main file, sidecars will inherit the name of the main file and be moved to the same location rather than being placed in the unknown directory.

You can change which file extensions are eligible to be considered sidecars using the `--sidecars` argument. For example, to only treat `.xmp` and `.json` as sidecars:

```
--sidecars='xmp,json'
```

To disable handling sidecars entirely:

```
--sidecars=''
```


### Dry run
If you want phockup to run without any changes (don't copy/move any files) but just show which changes would be done, enable this feature by using the flag `-y | --dry-run`.

Expand Down
67 changes: 44 additions & 23 deletions src/phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@


class Phockup:
DEFAULT_SIDECAR_EXTENSIONS = ["json", "xmp", "yml", "yaml"]
"""
Sidecar files are files with the same name as the source file, but with a
different (or additional) extension. For example, 'image.jpg' could have
possible sidecars 'image.jpg.xmp', 'image.json', 'image.yml', etc.
This member stores the recognized sidecar extensions.
"""
DEFAULT_DIR_FORMAT = ['%Y', '%m', '%d']
DEFAULT_NO_DATE_DIRECTORY = "unknown"

Expand Down Expand Up @@ -53,6 +61,11 @@ def __init__(self, input_dir, output_dir, **args):
self.dry_run = args.get('dry_run', False)
self.progress = args.get('progress', False)
self.max_depth = args.get('max_depth', -1)
self.sidecar_extensions = (
args.get("sidecars").split(",")
if args.get("sidecars")
else Phockup.DEFAULT_SIDECAR_EXTENSIONS
)
# default to concurrency of one to retain existing behavior
self.max_concurrency = args.get("max_concurrency", 1)
if self.max_concurrency > 1:
Expand Down Expand Up @@ -242,9 +255,9 @@ def process_files(self, file_paths_to_process):
def process_file(self, filename):
"""
Process the file using the selected strategy
If file is .xmp skip it so process_xmp method can handle it
If file is a sidecar skip it so process_sidecars method can handle it
"""
if str.endswith(filename, '.xmp'):
if any([filename.endswith(sc_ext) for sc_ext in self.sidecar_extensions]):
return None

progress = f'{filename}'
Expand Down Expand Up @@ -309,7 +322,7 @@ def process_file(self, filename):
self.pbar.write(progress)
logger.info(progress)

self.process_xmp(filename, target_file_name, suffix, output)
self.process_sidecars(filename, target_file_name, suffix, output)
break

suffix += 1
Expand Down Expand Up @@ -344,32 +357,40 @@ def get_file_name_and_path(self, filename):
target_file_path = os.path.sep.join([output, target_file_name])
return output, target_file_name, target_file_path, target_file_type

def process_xmp(self, original_filename, file_name, suffix, output):
def process_sidecars(self, original_filename, file_name, suffix, output):
"""
Process xmp files. These are metadata for RAW images
Given an existing image, handle any sidecar files.
"""
xmp_original_with_ext = original_filename + '.xmp'
xmp_original_without_ext = os.path.splitext(original_filename)[0] + '.xmp'

car_no_ext, car_extension = os.path.splitext(original_filename)
new_car_no_ext = os.path.splitext(file_name)[0]
suffix = f'-{suffix}' if suffix > 1 else ''

xmp_files = {}

if os.path.isfile(xmp_original_with_ext):
xmp_target = f'{file_name}{suffix}.xmp'
xmp_files[xmp_original_with_ext] = xmp_target
if os.path.isfile(xmp_original_without_ext):
xmp_target = f'{(os.path.splitext(file_name)[0])}{suffix}.xmp'
xmp_files[xmp_original_without_ext] = xmp_target

for original, target in xmp_files.items():
xmp_path = os.path.sep.join([output, target])
logger.info(f'{original} => {xmp_path}')
# Generate list of possible sidecars
sidecars = [
sidecar
for sc_ext in self.sidecar_extensions
for sidecar in (
car_no_ext + "." + sc_ext,
car_no_ext + car_extension + "." + sc_ext,
)
]
# Filter to only those that exist
sidecars = [sidecar for sidecar in sidecars if os.path.isfile(sidecar)]
# Build target filenames for sidecars
sidecars = [
(sidecar, sidecar.replace(car_no_ext, f"{new_car_no_ext}{suffix}"))
for sidecar in sidecars
]

# Perform the move
for original, target in sidecars:
sidecar_path = os.path.sep.join([output, target])
logger.info(f"{original} => {sidecar_path}")

if not self.dry_run:
if self.move:
shutil.move(original, xmp_path)
shutil.move(original, sidecar_path)
elif self.link:
os.link(original, xmp_path)
os.link(original, sidecar_path)
else:
shutil.copy2(original, xmp_path)
shutil.copy2(original, sidecar_path)

0 comments on commit 49563a4

Please sign in to comment.