index.html

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>

<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript">
    google.load("jquery", "1.3.2");
</script>

<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-5KQQ6EHLPE"></script>
<script>
    window.dataLayer = window.dataLayer || [];
    function gtag() { dataLayer.push(arguments); }
    gtag('js', new Date());

    gtag('config', 'G-5KQQ6EHLPE');
</script>

<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.0.0/jquery.min.js"></script>

<!-- jQuery Modal -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery-modal/0.9.1/jquery.modal.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jquery-modal/0.9.1/jquery.modal.min.css" />

<div class="topnav" id="myTopnav">
    <a href="http://infosec.pusan.ac.kr/"><img width="100%" src="assets/infosec_logo.png"></a>
    <a href="https://www.smartm2m.co.kr/"><img width="100%" src="assets/smartm2m_blk_logo.png"></a>
    <a href="https://add.re.kr/"><img width="100%" src="assets/add_logo.png"></a>
</div>

<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic'
    rel='stylesheet' type='text/css'>

<head>
    <title>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</title>
    <meta property="og:description"
        content="DTA: Physical Camouflage Attacks using Differentiable Transformation Network" />
    <link href="https://fonts.googleapis.com/css2?family=Material+Icons" rel="stylesheet">

    <link rel="stylesheet" href="style.css">

</head>


<body>
    <div class="container">

        <div id="avs" class="modal">
            <p>Available soon...</p>
        </div>

        <div class="paper-title">
            <h1>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</h1>
            <h2>CVPR 2022</h2>
        </div>

        <div id="authors">
            <div class="author-row">
                <div class="col-3 text-center"><a href="https://www.linkedin.com/in/naufal-suryanto/">Naufal
                        Suryanto</a><sup>1</sup></div>
                <div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=DXb797cAAAAJ">Yongsu
                        Kim</a><sup>1,2</sup></div>
                <div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=GeQi_D4AAAAJ">
                        Hyoeun Kang</a><sup>1</sup></div>
                <div class="col-3 text-center"><a href="https://scholar.google.co.id/citations?user=S8lwCEUAAAAJ">
                        Harashta Tatimma Larasati</a><sup>1</sup></div>
                <div class="col-3 text-center"><a href="https://scholar.google.com/citations?user=dFZRNOEAAAAJ">
                        Youngyeo Yun</a><sup>1</sup>
                </div>
                <div class="col-3 text-center"><a href="https://scholar.google.com/citations?user=UptzPYsAAAAJ">
                        Thi-Thu-Huong Le</a><sup>1</sup></div>
                <div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=mDxJj2AAAAAJ">Hunmin
                        Yang</a><sup>3</sup></div>
                <div class="col-3 text-center"><a href="https://ieeexplore.ieee.org/author/37088566336">Se-Yoon
                        Oh</a><sup>3</sup></div>
                <div class="col-3 text-center"><a href="https://ieeexplore.ieee.org/author/37082911200">Howon
                        Kim</a><sup>1,2</sup></div>
            </div>

            <div class="affil-row">
                <div class="col-3 text-center"><sup>1</sup>Pusan National University</a></div>
                <div class="col-3 text-center"><sup>2</sup>SmartM2M</div>
                <div class="col-3 text-center"><sup>3</sup>Agency for Defense Development</div>
            </div>

            <div style="clear: both">
                <div class="paper-btn-parent">
                    <a class="supp-btn"
                        href="https://openaccess.thecvf.com/content/CVPR2022/html/Suryanto_DTA_Physical_Camouflage_Attacks_Using_Differentiable_Transformation_Network_CVPR_2022_paper.html">
                        <span class="material-icons"> description </span>
                        Paper
                    </a>
                    <a class="supp-btn" href="assets/bib.txt">
                        <span class="material-icons"> description </span>
                        BibTeX
                    </a>
                </div>
            </div>

            <div class="announcement">
                <p style="animation: blinker 2s ease-in-out infinite;">
                    Follow-up work is available: <a
                        href="https://islab-ai.github.io/active-iccv2023/">ACTIVE-ICCV2023</a>
                </p>
            </div>

        </div>

        <section id="teaser">
            <figure style="width: 100%;">
                <a href="assets/DTA_attack_pipeline.png">
                    <img width="100%" src="assets/DTA_attack_pipeline.png">
                </a>
                <p class="caption" style="margin-bottom: 1px;">
                    <b>Differentiable Transformation Attack (DTA) </b> is our proposed framework for generating a robust
                    physical adversarial pattern on a target object to camouflage it against object detection models
                    under a wide range of transformations.
                    Our framework uses legacy photo-realistic renderers for simulating physical-world transformations
                    and employs our novel <b>Differentiable Transformation Network (DTN)</b> to enable texture
                    differentiability. Our framework produces a robust adversarial texture as a repeated pattern
                    applicable and transferable even in the real world.
                    <!-- DTN is trained to learn the expected transformation of a rendered object when the texture is
                    changed while retaining the target object's original properties. -->
                </p>
            </figure>
        </section>

        <section id="abstract">
            <h2>Abstract</h2>
            <hr>
            <p>
                To perform adversarial attacks in the physical world, many studies have proposed adversarial camouflage,
                a method to hide a target object by applying camouflage patterns on 3D object surfaces.
                For obtaining optimal physical adversarial camouflage, previous studies have utilized the so-called
                neural renderer, as it supports differentiability. However, existing neural renderers cannot fully
                represent various real-world transformations due to a lack of control of scene parameters compared to
                the legacy photo-realistic renderers. In this paper, we propose the Differentiable Transformation Attack
                (DTA), a framework for generating a robust physical adversarial pattern on a target object to camouflage
                it against object detection models with a wide range of transformations. It utilizes our novel
                Differentiable Transformation Network (DTN), which learns the expected transformation of a rendered
                object when the texture is changed while preserving the original properties of the target object. Using
                our attack framework, an adversary can gain both the advantages of the legacy photo-realistic renderers
                including various physical-world transformations and the benefit of white-box access by offering
                differentiability. Our experiments show that our camouflaged 3D vehicles can successfully evade
                state-of-the-art object detection models in the photo-realistic environment (i.e., <a
                    href="https://carla.org/">CARLA</a> on <a href="https://www.unrealengine.com/en-US/">Unreal
                    Engine</a>). Furthermore, our demonstration on a scaled Tesla Model 3 proves the applicability and
                transferability of our method to the real world.
            </p>

            <table>
                <thead>
                    <tr>
                        <th align="center">Photo-Realistic Simulation Demo</th>
                        <th align="center">Real World Demo</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td align="center">
                            <video class="centered" width="95%" muted loop autoplay>
                                <source src="assets/video/dta_photo_realistic_demo.webm" type="video/webm">
                                Your browser does not support the video tag.
                            </video>
                        </td>
                        <td align="center">
                            <video class="centered" width="95%" muted loop autoplay>
                                <source src="assets/video/dta_real_world_demo.webm" type="video/webm">
                                Your browser does not support the video tag.
                            </video>
                        </td>
                    </tr>
                </tbody>
            </table>

        </section>

        <hr>

        <section id="results">
            <h2>DTA Framework</h2>
            <hr>
            <p>
                As shown in the top picture, <b>DTA Framework</b> consists of four components: <b>Photo-Realistic
                    Rendering Engine</b>, <b>Repeated Texture Projection Function</b>, <b>Differentiable Transformation
                    Network (DTN)</b>, and the target <b>Object Detection Model</b>.
                <!-- DTA Framework is a practical approach that utilizes gradient-based optimization to find a robust physical
                adversarial pattern on legacy photo-realistic rendering, which is not always differentiable. -->
            </p>

            <h3>Photo-Realistic Rendering Engine</h3>
            <hr>
            <div class="flex-row">
                <figure style="width: 70%;">
                    <video class="centered" width="90%" controls muted loop autoplay>
                        <source src="assets/video/Photo-Realistic Rendering Demo.webm" type="video/webm">
                        Your browser does not support the video tag.
                    </video>
                </figure>
                <div style="width: 30%;">
                    <p> <b>Photo-realistic rendering engine</b> is any software that can produce a photo-realistic image
                        which is similar to the <b>real physical world</b>. In our work, we use <a
                            href="https://carla.org/2020/12/22/release-0.9.11/">
                            Carla Simulator (ver. 0.9.11)</a> on <a href="https://www.unrealengine.com/en-US/">Unreal
                            Engine (ver. 4.2)</a> to
                        synthesize our dataset as well as to evaluate our generated texture on photo-realsitic
                        simulation setting. We modify original code to allow car's texture modification.
                        The video illustrates the output of the rendering engine we use.
                    </p>
                </div>
            </div>

            <h3>Differentiable Transformation Network (DTN)</h3>
            <hr>
            <figure style="width: 100%;">
                <a href="assets/dtn_architecture.png">
                    <img width="100%" src="assets/dtn_architecture.png">
                </a>
                <p class="caption" style="margin-bottom: 1px;">
                    Our proposed <b>DTN learns the expected transformation of a rendered object when the texture is
                        changed</b> while preserving the original properties of the target object. It relies on the
                    photo-realistic image synthesized from a non-differentiable renderer to produce a differentiable
                    version of the reference image after applying the expected texture. DTN is embedded as an extension
                    to provide texture differentiability.
                </p>
            </figure>

            <div class="flex-row">
                <div style="width: 30%;">
                    <br>
                    <br>
                    <br>
                    <p>The video illustrates how our DTN can correctly predict the rendered image when the texture
                        (color) is changed. The network retains the original target properties such as material, light
                        reflection, and shadow from other objects.
                    </p>
                </div>
                <figure style="width: 70%;">
                    <video class="centered" width="90%" controls muted loop autoplay>
                        <source src="assets/video/DTN Color Rendering Demo.webm" type="video/webm">
                        Your browser does not support the video tag.
                    </video>
                </figure>
            </div>

            <h3>Repeated Texture Projection Function</h3>
            <hr>
            <figure style="width: 100%;">
                <a href="assets/repeated_texture_projection_function.png">
                    <img width="75%" class="center" src="assets/repeated_texture_projection_function.png">
                </a>
                <p class="caption" style="margin-bottom: 1px;">
                    We propose a <b>repeated pattern</b> as our final attack camouflage texture. It has several
                    benefits, such as ease of application because the texture can be used to cover the object while
                    ignoring the texture mapping. As the application, we propose a <b>Repeated Texture Projection
                        Function</b> for simply projecting the pattern with a sequence of operations by
                    <b>transformation matrix <i>M</i></b>. We use wrap mode for filling points outside boundaries, which
                    extends the output by wrapping around the opposite edge, giving a repeated texture effect.
                </p>
            </figure>

            <div class="flex-row">
                <div style="width: 30%;">
                    <br>
                    <br>
                    <p>The video illustrates how our DTN + Repeated Texture Projection Function can be used to mimic the
                        repeated pattern produced by the photo-realistic rendering engine. This gives us the
                        differentiable version of the photo-realistic renderer, allowing us to use gradient-based
                        optimization to find the optimum repeated attack texture.
                    </p>
                </div>
                <figure style="width: 70%;">
                    <video class="centered" width="90%" controls muted loop autoplay>
                        <source src="assets/video/DTN + Repeated Texture Projection Demo.webm" type="video/webm">
                        Your browser does not support the video tag.
                    </video>
                </figure>
            </div>

            <h2>Framework Procedure</h2>
            <hr>

            <h3>DTN Model Training</h3>
            <hr>

            <div class="flex-row">
                <div style="width: 30%;">
                    <br>
                    <p>
                        Before using DTA to generate the adversarial pattern, We need to train DTN with the dataset
                        generated by the photo-realistic rendering engine (see the first video on how the dataset is
                        generated). First, we select a set of random flat color textures and predefined transformations.
                        Then, we use the rendering engine to produce the photo-realistic images that will later be used
                        as reference image <i>x<sub>ref</sub></i> , expected texture <i>η<sub>exp</sub></i> , and
                        ground truth of rendered image <i>x<sub>ren</sub></i> . See the figure on the right for DTN
                        training diagram.
                    </p>
                </div>
                <figure style="width: 70%;">
                    <a href="assets/dtn_training_process.png">
                        <img width="90%" class="center" src="assets/dtn_training_process.png">
                    </a>
                </figure>
            </div>

            <figure style="width: 100%;">
                <p>
                    The training histories of DTN with DenseNet
                    architecture and the prediction samples are shown below.
                </p>

                <a href="assets/dtn_densenet_4_2_histories.png">
                    <img width="100%" class="center" src="assets/dtn_densenet_4_2_histories.png">
                </a>
                <a href="assets/dtn_prediction_demo.png">
                    <img width="100%" class="center" src="assets/dtn_prediction_demo.png">
                </a>
            </figure>

            <h3>DTA Attacking Phase</h3>
            <hr>

            <div class="flex-row">
                <div style="width: 50%;">
                    <br>
                    <p>
                        In the attack phase, the goal is to minimize the original target confidence score, which
                        prevents the object detector from detecting the target object correctly. We use the
                        differentiability of the complete DTA Framework to find the best adversarial pattern
                        <i>η<sub>adv</sub></i> that minimizes the attack loss <i>L<sub>atk</sub></i> by updating the
                        <i>η<sub>adv</sub></i> based on the loss gradient. The right figure shows the training history
                        for DTA targeting EfficientDetD0 model of Toyota Camry car.
                    </p>
                </div>
                <figure style="width: 50%;">
                    <a href="assets/camry_effdetd0_attack_histories.png">
                        <img width="100%" class="center" src="assets/camry_effdetd0_attack_histories.png">
                    </a>
                </figure>
            </div>

            <figure style="width: 100%;">
                <p>
                    The sample predictions of the DTA Framework consisting of standard, random (initial), and attack
                    (final) textured cars are shown below. As we can random textured car is not sufficient to camouflage
                    the car from the object detection model.
                </p>

                <a href="assets/dta_prediction_demo.png">
                    <img width="100%" class="center" src="assets/dta_prediction_demo.png">
                </a>
            </figure>

            <!-- <figure style="width: 100%;">
                <p class="caption" style="margin-bottom: 1px;">
                    In the attack phase, the goal is to minimize the original target confidence score, which
                    prevents the object detector from detecting the target object correctly. We can use the
                    differentiability of the complete DTA Framework to find the best adversarial pattern
                    <i>η<sub>adv</sub></i> that minimizes the attack loss <i>L<sub>atk</sub></i> by updating the
                    <i>η<sub>adv</sub></i> based on the loss gradient.
                </p>
            </figure> -->

            <h2>Evaluation Results</h2>
            <hr>
            <p>
                We compare our adversarial camouflage with a random pattern and previous works on 3D physical attacks:
                <a href="https://openreview.net/forum?id=SJgEl3A5tm">CAMOU</a>,
                <a href="https://arxiv.org/abs/2007.16118">ER</a>,
                <a
                    href="https://openaccess.thecvf.com/content_CVPR_2020/html/Huang_Universal_Physical_Camouflage_Attacks_on_Object_Detectors_CVPR_2020_paper.html">UPC</a>,
                and
                <a
                    href="https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Dual_Attention_Suppression_Attack_Generate_Adversarial_Camouflage_in_Physical_World_CVPR_2021_paper.html">DAS</a>
                We closely follow the approach to replicate the original papers, but we rebuild the environment and
                target models based on our evaluation setup (see supplementary material for the details). However,
                <a
                    href="https://openaccess.thecvf.com/content_CVPR_2020/html/Huang_Universal_Physical_Camouflage_Attacks_on_Object_Detectors_CVPR_2020_paper.html">UPC</a>
                and
                <a
                    href="https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Dual_Attention_Suppression_Attack_Generate_Adversarial_Camouflage_in_Physical_World_CVPR_2021_paper.html">DAS</a>
                have different settings to recreate in our environment; thus, we only evaluate them on the
                transferability experiment. Finally, we evaluate the transferability and applicability of our camouflage
                pattern in the real-world setting. We built two 1:10 scaled Tesla Model 3 using a 3D printer, each
                representing the normal and our camouflage texture. Then, we evaluate them in real-life locations,
                indoor and outdoor.
            </p>

            <h3>Photo-Realistic Simulation Evaluation</h3>
            <hr>
            <p style="text-align: center;">[Click the link to show sample demo videos]</p>
            <h4><a href="evaluation/effdetd0_5d_15p.html">Target: EfficientDetD0 | Camera: 5m Distance, 15° Pitch, 360°
                    Rotation</a></h4>
            <h4><a href="evaluation/effdetd0_10d_15p.html">Target: EfficientDetD0 | Camera: 10m Distance, 15° Pitch,
                    360°
                    Rotation</a></h4>
            <h4><a href="evaluation/effdetd0_10d_30p.html">Target: EfficientDetD0 | Camera: 10m Distance, 30°
                    Pitch, 360°
                    Rotation</a></h4>
            <h4><a href="evaluation/effdetd0_15d_30p.html">Target: EfficientDetD0 | Camera: 15m Distance, 30°
                    Pitch, 360°
                    Rotation</a></h4>

            <h3>Transferability Evaluation</h3>
            <hr>
            <p style="text-align: center;">[Click the link to show sample demo videos]</p>
            <h4><a href="evaluation/ssd_5d_0p.html">Target: SSD | Camera: 5m Distance, 0° Pitch, 360°
                    Rotation</a></h4>
            <h4><a href="evaluation/fasterrcnn_10d_15p.html">Target: Faster-RCNN | Camera: 10m Distance, 15° Pitch,
                    360°
                    Rotation</a></h4>
            <h4><a href="evaluation/maskrcnn_10d_30p.html">Target: Mask-RCNN | Camera: 10m Distance, 30° Pitch,
                    360°
                    Rotation</a></h4>

            <h3>Real-World Evaluation</h3>
            <hr>
            <figure style="width: 100%;">
                <a href="assets/RealWorld4x4.png">
                    <img width="100%" src="assets/RealWorld4x4.png">
                </a>
                <p style="margin-bottom: 1px;">

                </p>
            </figure>

        </section>

        <section id="bibtex">
            <h2>Citation</h2>
            <hr>
            <pre><code>
@InProceedings{Suryanto_2022_CVPR,
    author    = {Suryanto, Naufal and Kim, Yongsu and Kang, Hyoeun and Larasati, Harashta Tatimma and 
    Yun, Youngyeo and Le, Thi-Thu-Huong and Yang, Hunmin and Oh, Se-Yoon and Kim, Howon},
    title     = {DTA: Physical Camouflage Attacks Using Differentiable Transformation Network},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    month     = {June},
    year      = {2022},
    pages     = {15305-15314}
}
            </code></pre>
        </section>

        <br />
        <section id="paper">
            <h2>Paper</h2>
            <hr>
            <div class="flex-row">
                <div style="box-sizing: border-box; padding: 16px; margin: auto;">
                    <a href="assets/paper_preview.png"><img class="screenshot" src="assets/paper_preview.png"></a>
                </div>
                <div style="width: 50%">
                    <p><b>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</b></p>
                    <p>
                        Naufal Suryanto, Yongsu Kim, Hyoeun Kang, Harashta Tatimma Larasati,
                        Youngyeo Yun, Thi-Thu-Huong Le, Hunmin Yang, Se-Yoon Oh, Howon Kim
                    </p>
                    <div><span class="material-icons"> description </span><a
                            href="https://openaccess.thecvf.com/content/CVPR2022/papers/Suryanto_DTA_Physical_Camouflage_Attacks_Using_Differentiable_Transformation_Network_CVPR_2022_paper.pdf">
                            Paper</a>
                    </div>
                    <div><span class="material-icons"> description </span><a
                            href="https://openaccess.thecvf.com/content/CVPR2022/supplemental/Suryanto_DTA_Physical_Camouflage_CVPR_2022_supplemental.pdf"
                            rel="modal:open">
                            Supplementary</a></div>
                    <div><span class="material-icons"> description </span><a href="https://arxiv.org/abs/2203.09831">
                            arXiv</a>
                    </div>
                    <div><span class="material-icons"> description </span><a href="assets/bib.txt"> BibTeX</a>
                    </div>
                </div>
            </div>
        </section>
    </div>

    <script src="script.js"></script>
</body>

</html>