Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ability to show unified diffs for changed files #562

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions copier/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class CopierApp(cli.Application):
skip: Set [skip_if_exists][] option.
prereleases: Set [use_prereleases][] option.
quiet: Set [quiet][] option.
diff: Set [diff][] option.
"""

DESCRIPTION = "Create a new project from a template."
Expand Down Expand Up @@ -175,6 +176,10 @@ class CopierApp(cli.Application):
["-g", "--prereleases"],
help="Use prereleases to compare template VCS tags.",
)
diff: cli.Flag = cli.Flag(
["-D", "--diff"],
help="Show diff when updating files",
)

@cli.switch(
["-d", "--data"],
Expand Down Expand Up @@ -216,6 +221,7 @@ def _worker(self, src_path: OptStr = None, dst_path: str = ".", **kwargs) -> Wor
src_path=src_path,
vcs_ref=self.vcs_ref,
use_prereleases=self.prereleases,
diff=self.diff,
**kwargs,
)

Expand Down
26 changes: 23 additions & 3 deletions copier/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import Callable, Iterable, List, Mapping, Optional, Sequence
from unicodedata import normalize

import colorama
import pathspec
from jinja2.loaders import FileSystemLoader
from jinja2.sandbox import SandboxedEnvironment
Expand All @@ -26,7 +27,7 @@
from .errors import CopierAnswersInterrupt, ExtensionNotFoundError, UserMessageError
from .subproject import Subproject
from .template import Template
from .tools import Style, TemporaryDirectory, printf
from .tools import Style, TemporaryDirectory, color_print, is_binary, print_diff, printf
from .types import (
AnyByStrDict,
JSONSerializable,
Expand Down Expand Up @@ -128,6 +129,12 @@ class Worker:
When `True`, disable all output.

See [quiet][].

diff:
When `True` show a unified diff for changed files.

See [diff][].

"""

src_path: Optional[str] = None
Expand All @@ -144,6 +151,7 @@ class Worker:
overwrite: bool = False
pretend: bool = False
quiet: bool = False
diff: bool = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a nice addition. I don't think it's needed to add a new flag for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just default to always diff when we can?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can't imagine a use case where the no diff is more useful if we're on interactive mode.


def _answers_to_remember(self) -> Mapping:
"""Get only answers that will be remembered in the copier answers file."""
Expand Down Expand Up @@ -216,7 +224,12 @@ def _path_matcher(self, patterns: Iterable[str]) -> Callable[[Path], bool]:
spec = pathspec.PathSpec.from_lines("gitwildmatch", normalized_patterns)
return spec.match_file

def _solve_render_conflict(self, dst_relpath: Path):
def _solve_render_conflict(
self,
dst_relpath: Path,
original_content: Optional[bytes] = None,
new_content: Optional[bytes] = None,
):
"""Properly solve render conflicts.

It can ask the user if running in interactive mode.
Expand Down Expand Up @@ -247,6 +260,11 @@ def _solve_render_conflict(self, dst_relpath: Path):
file_=sys.stderr,
)
return True
if self.diff and original_content and new_content:
if is_binary(original_content) or is_binary(new_content):
color_print(colorama.Fore.YELLOW, " diff not availalbe for binaries")
else:
print_diff(dst_relpath, original_content, new_content)
return bool(ask(f" Overwrite {dst_relpath}?", default=True))

def _render_allowed(
Expand Down Expand Up @@ -310,7 +328,9 @@ def _render_allowed(
file_=sys.stderr,
)
return True
return self._solve_render_conflict(dst_relpath)
return self._solve_render_conflict(
dst_relpath, previous_content, expected_contents
)

@cached_property
def answers(self) -> AnswersMap:
Expand Down
56 changes: 55 additions & 1 deletion copier/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tempfile
import warnings
from contextlib import suppress
from difflib import unified_diff
from pathlib import Path
from types import TracebackType
from typing import Any, Callable, Optional, TextIO, Tuple, Union
Expand All @@ -16,7 +17,7 @@
from packaging.version import Version
from pydantic import StrictBool

from .types import IntSeq
from .types import IntSeq, StrSeq

try:
from importlib.metadata import version
Expand Down Expand Up @@ -172,3 +173,56 @@ def cleanup(self):
@staticmethod
def _robust_cleanup(name):
shutil.rmtree(name, ignore_errors=False, onerror=handle_remove_readonly)


def get_lines(content: bytes) -> StrSeq:
# Assume UTF-8 encoding for text files. Since Jinja2 is already doing this
# for templates, there probably isn't much harm in also doing it for all
# text files. The worst case should be a failure to show the diffs. An
# alternative is to add chardet support.
return content.decode("utf-8").split(os.linesep)


def is_binary(content: bytes) -> bool:
"""Return a boolean indicating if the content is a binary file."""
return b"\x00" in content or b"\xff" in content


def color_print(color: int, msg: str):
print(f"{color}{msg}{colorama.Fore.RESET}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def print_diff(dst_relpath: Path, original_content: bytes, new_content: bytes):
try:
original_lines = get_lines(original_content)
except Exception:
color_print(
colorama.Fore.YELLOW, "diff unavailable: unable to determine encoding"
)
return
try:
new_lines = get_lines(new_content)
except Exception:
color_print(
colorama.Fore.YELLOW, "diff unavailable: unable to determine encoding"
)
return
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know some flake8 plugins would say: "only one line in a try block", but this could probably be factorized as:

Suggested change
try:
original_lines = get_lines(original_content)
except Exception:
color_print(
colorama.Fore.YELLOW, "diff unavailable: unable to determine encoding"
)
return
try:
new_lines = get_lines(new_content)
except Exception:
color_print(
colorama.Fore.YELLOW, "diff unavailable: unable to determine encoding"
)
return
try:
original_lines = get_lines(original_content)
new_lines = get_lines(new_content)
except Exception:
color_print(
colorama.Fore.YELLOW, "diff unavailable: unable to determine encoding"
)
return

Also, is there a reason you catch Exception rather than the more specific UnicodeDecodeError?

Copy link
Contributor Author

@dstanek dstanek Feb 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could combine the try blocks.

The reason I'm currently catching Exception is mostly because this isn't ready to merge. My first version used chardet and there were some exceptions to catch, then I decided to do a "detect on the cheap" algorithm to avoid an extra dependency and finally settled on hard coding utf-8 once I realized that Jinja is already doing that. If the final version is just the single decode then I could switch to catch that one exception.

Since the previous implementations had a few exceptions to catch and there is no particular handing for any of them I just went with Exception.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks!


indent = " " * 6
for line in unified_diff(
original_lines,
new_lines,
f"old/{dst_relpath}",
f"new/{dst_relpath}",
lineterm="",
):
if line.startswith("---") or line.startswith("+++"):
color_print(colorama.Fore.YELLOW, f"{indent}{line}")
elif line.startswith("@@"):
color_print(colorama.Fore.MAGENTA, f"{indent}{line}")
elif line.startswith("-"):
color_print(colorama.Fore.RED, f"{indent}{line}")
elif line.startswith("+"):
color_print(colorama.Fore.GREEN, f"{indent}{line}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I was going down this path first, but it seemed very difficult to get the result I wanted. I went ahead and finished that work a put together a gist showing the differences. orig.diff is the code from this PR and new.diff uses pygments and prompt_toolkit.

One reason the new code is much larger is that I wanted to have the two-line file header yellow like it is with git. This meant that I had to extend the lexer. In order to add the extra indentation, I had to also extend the PygmentsTokens class to inject whitespace. There may be better ways to accomplish these tasks, but I'm never had to dip into the bowels of these libraries before.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.

Well, I'll have to do local testing for both implementations, because code looks complex in both cases, so if we're getting a complexity, I'll get the one that looks/works better 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll update this implementation here since that's what I'm actively using and I think I can make it cleaner and more concise. If you like the other way better I can switch to that later.

else:
print(f"{indent}{line}")
2 changes: 1 addition & 1 deletion tests/test_templated_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def test_templated_prompt_builtins(tmp_path_factory):
)
Worker(str(src), dst, defaults=True, overwrite=True).run_copy()
that_now = datetime.fromisoformat((dst / "now").read_text())
assert that_now <= datetime.now()
assert that_now <= datetime.utcnow()
assert len((dst / "make_secret").read_text()) == 128


Expand Down