-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
480 lines (430 loc) · 25.4 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript">
google.load("jquery", "1.3.2");
</script>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-5KQQ6EHLPE"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() { dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'G-5KQQ6EHLPE');
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.0.0/jquery.min.js"></script>
<!-- jQuery Modal -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery-modal/0.9.1/jquery.modal.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jquery-modal/0.9.1/jquery.modal.min.css" />
<div class="topnav" id="myTopnav">
<a href="http://infosec.pusan.ac.kr/"><img width="100%" src="assets/infosec_logo.png"></a>
<a href="https://www.smartm2m.co.kr/"><img width="100%" src="assets/smartm2m_blk_logo.png"></a>
<a href="https://add.re.kr/"><img width="100%" src="assets/add_logo.png"></a>
</div>
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic'
rel='stylesheet' type='text/css'>
<head>
<title>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</title>
<meta property="og:description"
content="DTA: Physical Camouflage Attacks using Differentiable Transformation Network" />
<link href="https://fonts.googleapis.com/css2?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="style.css">
</head>
<body>
<div class="container">
<div id="avs" class="modal">
<p>Available soon...</p>
</div>
<div class="paper-title">
<h1>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</h1>
<h2>CVPR 2022</h2>
</div>
<div id="authors">
<div class="author-row">
<div class="col-3 text-center"><a href="https://www.linkedin.com/in/naufal-suryanto/">Naufal
Suryanto</a><sup>1</sup></div>
<div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=DXb797cAAAAJ">Yongsu
Kim</a><sup>1,2</sup></div>
<div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=GeQi_D4AAAAJ">
Hyoeun Kang</a><sup>1</sup></div>
<div class="col-3 text-center"><a href="https://scholar.google.co.id/citations?user=S8lwCEUAAAAJ">
Harashta Tatimma Larasati</a><sup>1</sup></div>
<div class="col-3 text-center"><a href="https://scholar.google.com/citations?user=dFZRNOEAAAAJ">
Youngyeo Yun</a><sup>1</sup>
</div>
<div class="col-3 text-center"><a href="https://scholar.google.com/citations?user=UptzPYsAAAAJ">
Thi-Thu-Huong Le</a><sup>1</sup></div>
<div class="col-3 text-center"><a href="https://scholar.google.co.kr/citations?user=mDxJj2AAAAAJ">Hunmin
Yang</a><sup>3</sup></div>
<div class="col-3 text-center"><a href="https://ieeexplore.ieee.org/author/37088566336">Se-Yoon
Oh</a><sup>3</sup></div>
<div class="col-3 text-center"><a href="https://ieeexplore.ieee.org/author/37082911200">Howon
Kim</a><sup>1,2</sup></div>
</div>
<div class="affil-row">
<div class="col-3 text-center"><sup>1</sup>Pusan National University</a></div>
<div class="col-3 text-center"><sup>2</sup>SmartM2M</div>
<div class="col-3 text-center"><sup>3</sup>Agency for Defense Development</div>
</div>
<div style="clear: both">
<div class="paper-btn-parent">
<a class="supp-btn"
href="https://openaccess.thecvf.com/content/CVPR2022/html/Suryanto_DTA_Physical_Camouflage_Attacks_Using_Differentiable_Transformation_Network_CVPR_2022_paper.html">
<span class="material-icons"> description </span>
Paper
</a>
<a class="supp-btn" href="assets/bib.txt">
<span class="material-icons"> description </span>
BibTeX
</a>
</div>
</div>
<div class="announcement">
<p style="animation: blinker 2s ease-in-out infinite;">
Follow-up work is available: <a
href="https://islab-ai.github.io/active-iccv2023/">ACTIVE-ICCV2023</a>
</p>
</div>
</div>
<section id="teaser">
<figure style="width: 100%;">
<a href="assets/DTA_attack_pipeline.png">
<img width="100%" src="assets/DTA_attack_pipeline.png">
</a>
<p class="caption" style="margin-bottom: 1px;">
<b>Differentiable Transformation Attack (DTA) </b> is our proposed framework for generating a robust
physical adversarial pattern on a target object to camouflage it against object detection models
under a wide range of transformations.
Our framework uses legacy photo-realistic renderers for simulating physical-world transformations
and employs our novel <b>Differentiable Transformation Network (DTN)</b> to enable texture
differentiability. Our framework produces a robust adversarial texture as a repeated pattern
applicable and transferable even in the real world.
<!-- DTN is trained to learn the expected transformation of a rendered object when the texture is
changed while retaining the target object's original properties. -->
</p>
</figure>
</section>
<section id="abstract">
<h2>Abstract</h2>
<hr>
<p>
To perform adversarial attacks in the physical world, many studies have proposed adversarial camouflage,
a method to hide a target object by applying camouflage patterns on 3D object surfaces.
For obtaining optimal physical adversarial camouflage, previous studies have utilized the so-called
neural renderer, as it supports differentiability. However, existing neural renderers cannot fully
represent various real-world transformations due to a lack of control of scene parameters compared to
the legacy photo-realistic renderers. In this paper, we propose the Differentiable Transformation Attack
(DTA), a framework for generating a robust physical adversarial pattern on a target object to camouflage
it against object detection models with a wide range of transformations. It utilizes our novel
Differentiable Transformation Network (DTN), which learns the expected transformation of a rendered
object when the texture is changed while preserving the original properties of the target object. Using
our attack framework, an adversary can gain both the advantages of the legacy photo-realistic renderers
including various physical-world transformations and the benefit of white-box access by offering
differentiability. Our experiments show that our camouflaged 3D vehicles can successfully evade
state-of-the-art object detection models in the photo-realistic environment (i.e., <a
href="https://carla.org/">CARLA</a> on <a href="https://www.unrealengine.com/en-US/">Unreal
Engine</a>). Furthermore, our demonstration on a scaled Tesla Model 3 proves the applicability and
transferability of our method to the real world.
</p>
<table>
<thead>
<tr>
<th align="center">Photo-Realistic Simulation Demo</th>
<th align="center">Real World Demo</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">
<video class="centered" width="95%" muted loop autoplay>
<source src="assets/video/dta_photo_realistic_demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>
</td>
<td align="center">
<video class="centered" width="95%" muted loop autoplay>
<source src="assets/video/dta_real_world_demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>
</td>
</tr>
</tbody>
</table>
</section>
<hr>
<section id="results">
<h2>DTA Framework</h2>
<hr>
<p>
As shown in the top picture, <b>DTA Framework</b> consists of four components: <b>Photo-Realistic
Rendering Engine</b>, <b>Repeated Texture Projection Function</b>, <b>Differentiable Transformation
Network (DTN)</b>, and the target <b>Object Detection Model</b>.
<!-- DTA Framework is a practical approach that utilizes gradient-based optimization to find a robust physical
adversarial pattern on legacy photo-realistic rendering, which is not always differentiable. -->
</p>
<h3>Photo-Realistic Rendering Engine</h3>
<hr>
<div class="flex-row">
<figure style="width: 70%;">
<video class="centered" width="90%" controls muted loop autoplay>
<source src="assets/video/Photo-Realistic Rendering Demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>
</figure>
<div style="width: 30%;">
<p> <b>Photo-realistic rendering engine</b> is any software that can produce a photo-realistic image
which is similar to the <b>real physical world</b>. In our work, we use <a
href="https://carla.org/2020/12/22/release-0.9.11/">
Carla Simulator (ver. 0.9.11)</a> on <a href="https://www.unrealengine.com/en-US/">Unreal
Engine (ver. 4.2)</a> to
synthesize our dataset as well as to evaluate our generated texture on photo-realsitic
simulation setting. We modify original code to allow car's texture modification.
The video illustrates the output of the rendering engine we use.
</p>
</div>
</div>
<h3>Differentiable Transformation Network (DTN)</h3>
<hr>
<figure style="width: 100%;">
<a href="assets/dtn_architecture.png">
<img width="100%" src="assets/dtn_architecture.png">
</a>
<p class="caption" style="margin-bottom: 1px;">
Our proposed <b>DTN learns the expected transformation of a rendered object when the texture is
changed</b> while preserving the original properties of the target object. It relies on the
photo-realistic image synthesized from a non-differentiable renderer to produce a differentiable
version of the reference image after applying the expected texture. DTN is embedded as an extension
to provide texture differentiability.
</p>
</figure>
<div class="flex-row">
<div style="width: 30%;">
<br>
<br>
<br>
<p>The video illustrates how our DTN can correctly predict the rendered image when the texture
(color) is changed. The network retains the original target properties such as material, light
reflection, and shadow from other objects.
</p>
</div>
<figure style="width: 70%;">
<video class="centered" width="90%" controls muted loop autoplay>
<source src="assets/video/DTN Color Rendering Demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>
</figure>
</div>
<h3>Repeated Texture Projection Function</h3>
<hr>
<figure style="width: 100%;">
<a href="assets/repeated_texture_projection_function.png">
<img width="75%" class="center" src="assets/repeated_texture_projection_function.png">
</a>
<p class="caption" style="margin-bottom: 1px;">
We propose a <b>repeated pattern</b> as our final attack camouflage texture. It has several
benefits, such as ease of application because the texture can be used to cover the object while
ignoring the texture mapping. As the application, we propose a <b>Repeated Texture Projection
Function</b> for simply projecting the pattern with a sequence of operations by
<b>transformation matrix <i>M</i></b>. We use wrap mode for filling points outside boundaries, which
extends the output by wrapping around the opposite edge, giving a repeated texture effect.
</p>
</figure>
<div class="flex-row">
<div style="width: 30%;">
<br>
<br>
<p>The video illustrates how our DTN + Repeated Texture Projection Function can be used to mimic the
repeated pattern produced by the photo-realistic rendering engine. This gives us the
differentiable version of the photo-realistic renderer, allowing us to use gradient-based
optimization to find the optimum repeated attack texture.
</p>
</div>
<figure style="width: 70%;">
<video class="centered" width="90%" controls muted loop autoplay>
<source src="assets/video/DTN + Repeated Texture Projection Demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>
</figure>
</div>
<h2>Framework Procedure</h2>
<hr>
<h3>DTN Model Training</h3>
<hr>
<div class="flex-row">
<div style="width: 30%;">
<br>
<p>
Before using DTA to generate the adversarial pattern, We need to train DTN with the dataset
generated by the photo-realistic rendering engine (see the first video on how the dataset is
generated). First, we select a set of random flat color textures and predefined transformations.
Then, we use the rendering engine to produce the photo-realistic images that will later be used
as reference image <i>x<sub>ref</sub></i> , expected texture <i>η<sub>exp</sub></i> , and
ground truth of rendered image <i>x<sub>ren</sub></i> . See the figure on the right for DTN
training diagram.
</p>
</div>
<figure style="width: 70%;">
<a href="assets/dtn_training_process.png">
<img width="90%" class="center" src="assets/dtn_training_process.png">
</a>
</figure>
</div>
<figure style="width: 100%;">
<p>
The training histories of DTN with DenseNet
architecture and the prediction samples are shown below.
</p>
<a href="assets/dtn_densenet_4_2_histories.png">
<img width="100%" class="center" src="assets/dtn_densenet_4_2_histories.png">
</a>
<a href="assets/dtn_prediction_demo.png">
<img width="100%" class="center" src="assets/dtn_prediction_demo.png">
</a>
</figure>
<h3>DTA Attacking Phase</h3>
<hr>
<div class="flex-row">
<div style="width: 50%;">
<br>
<p>
In the attack phase, the goal is to minimize the original target confidence score, which
prevents the object detector from detecting the target object correctly. We use the
differentiability of the complete DTA Framework to find the best adversarial pattern
<i>η<sub>adv</sub></i> that minimizes the attack loss <i>L<sub>atk</sub></i> by updating the
<i>η<sub>adv</sub></i> based on the loss gradient. The right figure shows the training history
for DTA targeting EfficientDetD0 model of Toyota Camry car.
</p>
</div>
<figure style="width: 50%;">
<a href="assets/camry_effdetd0_attack_histories.png">
<img width="100%" class="center" src="assets/camry_effdetd0_attack_histories.png">
</a>
</figure>
</div>
<figure style="width: 100%;">
<p>
The sample predictions of the DTA Framework consisting of standard, random (initial), and attack
(final) textured cars are shown below. As we can random textured car is not sufficient to camouflage
the car from the object detection model.
</p>
<a href="assets/dta_prediction_demo.png">
<img width="100%" class="center" src="assets/dta_prediction_demo.png">
</a>
</figure>
<!-- <figure style="width: 100%;">
<p class="caption" style="margin-bottom: 1px;">
In the attack phase, the goal is to minimize the original target confidence score, which
prevents the object detector from detecting the target object correctly. We can use the
differentiability of the complete DTA Framework to find the best adversarial pattern
<i>η<sub>adv</sub></i> that minimizes the attack loss <i>L<sub>atk</sub></i> by updating the
<i>η<sub>adv</sub></i> based on the loss gradient.
</p>
</figure> -->
<h2>Evaluation Results</h2>
<hr>
<p>
We compare our adversarial camouflage with a random pattern and previous works on 3D physical attacks:
<a href="https://openreview.net/forum?id=SJgEl3A5tm">CAMOU</a>,
<a href="https://arxiv.org/abs/2007.16118">ER</a>,
<a
href="https://openaccess.thecvf.com/content_CVPR_2020/html/Huang_Universal_Physical_Camouflage_Attacks_on_Object_Detectors_CVPR_2020_paper.html">UPC</a>,
and
<a
href="https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Dual_Attention_Suppression_Attack_Generate_Adversarial_Camouflage_in_Physical_World_CVPR_2021_paper.html">DAS</a>
We closely follow the approach to replicate the original papers, but we rebuild the environment and
target models based on our evaluation setup (see supplementary material for the details). However,
<a
href="https://openaccess.thecvf.com/content_CVPR_2020/html/Huang_Universal_Physical_Camouflage_Attacks_on_Object_Detectors_CVPR_2020_paper.html">UPC</a>
and
<a
href="https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Dual_Attention_Suppression_Attack_Generate_Adversarial_Camouflage_in_Physical_World_CVPR_2021_paper.html">DAS</a>
have different settings to recreate in our environment; thus, we only evaluate them on the
transferability experiment. Finally, we evaluate the transferability and applicability of our camouflage
pattern in the real-world setting. We built two 1:10 scaled Tesla Model 3 using a 3D printer, each
representing the normal and our camouflage texture. Then, we evaluate them in real-life locations,
indoor and outdoor.
</p>
<h3>Photo-Realistic Simulation Evaluation</h3>
<hr>
<p style="text-align: center;">[Click the link to show sample demo videos]</p>
<h4><a href="evaluation/effdetd0_5d_15p.html">Target: EfficientDetD0 | Camera: 5m Distance, 15° Pitch, 360°
Rotation</a></h4>
<h4><a href="evaluation/effdetd0_10d_15p.html">Target: EfficientDetD0 | Camera: 10m Distance, 15° Pitch,
360°
Rotation</a></h4>
<h4><a href="evaluation/effdetd0_10d_30p.html">Target: EfficientDetD0 | Camera: 10m Distance, 30°
Pitch, 360°
Rotation</a></h4>
<h4><a href="evaluation/effdetd0_15d_30p.html">Target: EfficientDetD0 | Camera: 15m Distance, 30°
Pitch, 360°
Rotation</a></h4>
<h3>Transferability Evaluation</h3>
<hr>
<p style="text-align: center;">[Click the link to show sample demo videos]</p>
<h4><a href="evaluation/ssd_5d_0p.html">Target: SSD | Camera: 5m Distance, 0° Pitch, 360°
Rotation</a></h4>
<h4><a href="evaluation/fasterrcnn_10d_15p.html">Target: Faster-RCNN | Camera: 10m Distance, 15° Pitch,
360°
Rotation</a></h4>
<h4><a href="evaluation/maskrcnn_10d_30p.html">Target: Mask-RCNN | Camera: 10m Distance, 30° Pitch,
360°
Rotation</a></h4>
<h3>Real-World Evaluation</h3>
<hr>
<figure style="width: 100%;">
<a href="assets/RealWorld4x4.png">
<img width="100%" src="assets/RealWorld4x4.png">
</a>
<p style="margin-bottom: 1px;">
</p>
</figure>
</section>
<section id="bibtex">
<h2>Citation</h2>
<hr>
<pre><code>
@InProceedings{Suryanto_2022_CVPR,
author = {Suryanto, Naufal and Kim, Yongsu and Kang, Hyoeun and Larasati, Harashta Tatimma and
Yun, Youngyeo and Le, Thi-Thu-Huong and Yang, Hunmin and Oh, Se-Yoon and Kim, Howon},
title = {DTA: Physical Camouflage Attacks Using Differentiable Transformation Network},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {15305-15314}
}
</code></pre>
</section>
<br />
<section id="paper">
<h2>Paper</h2>
<hr>
<div class="flex-row">
<div style="box-sizing: border-box; padding: 16px; margin: auto;">
<a href="assets/paper_preview.png"><img class="screenshot" src="assets/paper_preview.png"></a>
</div>
<div style="width: 50%">
<p><b>DTA: Physical Camouflage Attacks using Differentiable Transformation Network</b></p>
<p>
Naufal Suryanto, Yongsu Kim, Hyoeun Kang, Harashta Tatimma Larasati,
Youngyeo Yun, Thi-Thu-Huong Le, Hunmin Yang, Se-Yoon Oh, Howon Kim
</p>
<div><span class="material-icons"> description </span><a
href="https://openaccess.thecvf.com/content/CVPR2022/papers/Suryanto_DTA_Physical_Camouflage_Attacks_Using_Differentiable_Transformation_Network_CVPR_2022_paper.pdf">
Paper</a>
</div>
<div><span class="material-icons"> description </span><a
href="https://openaccess.thecvf.com/content/CVPR2022/supplemental/Suryanto_DTA_Physical_Camouflage_CVPR_2022_supplemental.pdf"
rel="modal:open">
Supplementary</a></div>
<div><span class="material-icons"> description </span><a href="https://arxiv.org/abs/2203.09831">
arXiv</a>
</div>
<div><span class="material-icons"> description </span><a href="assets/bib.txt"> BibTeX</a>
</div>
</div>
</div>
</section>
</div>
<script src="script.js"></script>
</body>
</html>