Skip to content

Commit

Permalink
Site updated: 2023-10-07 13:49:36
Browse files Browse the repository at this point in the history
  • Loading branch information
cxzlw committed Oct 7, 2023
1 parent b1bd559 commit 969924f
Show file tree
Hide file tree
Showing 76 changed files with 250 additions and 391 deletions.
25 changes: 4 additions & 21 deletions 2023/07/05/trace-of-line-01/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<meta property="og:description" content="不存在的历史,勾起了石月仙的注意,随着找寻真相的深入,一个隐瞒了全世界的阴谋浮出水面。而对于人类,阴谋何来,何从,何去?">
<meta property="og:locale" content="zh_CN">
<meta property="article:published_time" content="2023-07-05T14:20:06.000Z">
<meta property="article:modified_time" content="2023-10-06T11:54:48.871Z">
<meta property="article:modified_time" content="2023-10-07T05:47:21.732Z">
<meta property="article:author" content="cxzlw">
<meta property="article:tag" content="飞石">
<meta property="article:tag" content="小说">
Expand All @@ -34,7 +34,7 @@



<link rel="manifest" href="/manifest.json"> <script type="module">import {Workbox} from "https://registry.npmmirror.com/workbox-window/7.0.0/files/build/workbox-window.prod.mjs";if ("serviceWorker" in navigator){const wb=new Workbox("/sw.js");wb.register();}</script>
<link rel="manifest" href="/manifest.json"> <script async type="module">import {Workbox} from "https://registry.npmmirror.com/workbox-window/7.0.0/files/build/workbox-window.prod.mjs";if ("serviceWorker" in navigator){const wb=new Workbox("/sw.js");wb.register();}</script>

<title>《飞石》Pt.1 未掷出的石子 第一章:不存在的历史 - 创新者.老王的博客</title>

Expand All @@ -46,8 +46,6 @@

<link rel="stylesheet" href="https://lib.baomitu.com/hint.css/2.7.0/hint.min.css" />

<link rel="stylesheet" href="https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.css" />



<!-- 主题依赖的图标库,不要自行修改 -->
Expand All @@ -74,7 +72,7 @@
<script id="fluid-configs">
var Fluid = window.Fluid || {};
Fluid.ctx = Object.assign({}, Fluid.ctx)
var CONFIG = {"hostname":"blog.cxzlw.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":""},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":true,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"/img/loading.gif","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
var CONFIG = {"hostname":"blog.cxzlw.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":""},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":false,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"/img/loading.gif","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};

if (CONFIG.web_analytics.follow_dnt) {
var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
Expand Down Expand Up @@ -581,9 +579,6 @@ <h2 id="第一章:不存在的历史"><a href="#第一章:不存在的历史









Expand Down Expand Up @@ -782,25 +777,13 @@ <h4 class="modal-title w-100 font-weight-bold">搜索</h4>
</script>



<script>
Fluid.utils.createScript('https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.js', function() {
Fluid.plugins.fancyBox();
});
</script>


<script>Fluid.plugins.imageCaption();</script>

<script src="/js/local-search.js" ></script>





<script src="/js/progressbar_done.js"></script>



<!-- 主题的启动项,将它保持在最底部 -->
<!-- the boot of the theme, keep it at the bottom -->
Expand All @@ -814,5 +797,5 @@ <h4 class="modal-title w-100 font-weight-bold">搜索</h4>
<noscript>
<div class="noscript-warning">博客在允许 JavaScript 运行的环境下浏览效果更佳</div>
</noscript>
</body>
<!-- hexo injector body_end start --><script async src="/js/image-ng.js"></script><script async src="/js/progressbar-done.js"></script><!-- hexo injector body_end end --></body>
</html>
71 changes: 45 additions & 26 deletions 2023/07/05/zhihu-aac-old/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://blog.cxzlw.top/img/image.png">
<meta property="article:published_time" content="2023-07-04T17:49:31.000Z">
<meta property="article:modified_time" content="2023-10-06T11:54:48.871Z">
<meta property="article:modified_time" content="2023-10-07T05:47:21.732Z">
<meta property="article:author" content="cxzlw">
<meta property="article:tag" content="Python">
<meta property="article:tag" content="cxzlw">
Expand All @@ -36,7 +36,7 @@



<link rel="manifest" href="/manifest.json"> <script type="module">import {Workbox} from "https://registry.npmmirror.com/workbox-window/7.0.0/files/build/workbox-window.prod.mjs";if ("serviceWorker" in navigator){const wb=new Workbox("/sw.js");wb.register();}</script>
<link rel="manifest" href="/manifest.json"> <script async type="module">import {Workbox} from "https://registry.npmmirror.com/workbox-window/7.0.0/files/build/workbox-window.prod.mjs";if ("serviceWorker" in navigator){const wb=new Workbox("/sw.js");wb.register();}</script>

<title>聊聊知乎盐选反爬 (回答页篇) - 创新者.老王的博客</title>

Expand All @@ -48,8 +48,6 @@

<link rel="stylesheet" href="https://lib.baomitu.com/hint.css/2.7.0/hint.min.css" />

<link rel="stylesheet" href="https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.css" />



<!-- 主题依赖的图标库,不要自行修改 -->
Expand All @@ -76,7 +74,7 @@
<script id="fluid-configs">
var Fluid = window.Fluid || {};
Fluid.ctx = Object.assign({}, Fluid.ctx)
var CONFIG = {"hostname":"blog.cxzlw.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":""},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":true,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"/img/loading.gif","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
var CONFIG = {"hostname":"blog.cxzlw.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":""},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":false,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"/img/loading.gif","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};

if (CONFIG.web_analytics.follow_dnt) {
var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
Expand Down Expand Up @@ -304,7 +302,13 @@ <h1 id="seo-header">聊聊知乎盐选反爬 (回答页篇)</h1>

<p>最近,知乎上线了针对专栏<sup id="fnref:1" class="footnote-ref"><a href="#fn:1" rel="footnote"><span class="hint--top hint--rounded" aria-label="专栏反爬现已更新,故本文只以回答反爬为演示。">[1]</span></a></sup>中盐选文章的反爬系统,随后该系统也被运用在知乎回答页面中的盐选文章上。具体表现为爬取的文章内容中出现大量的错乱词汇。而在本篇文章中,我们将一步步带领各位解开这些乱码。在这个过程中,我们将对字体反爬有更深入的认识,并学到运用字体反爬时需要注意的问题。</p>
<h2 id="一、知乎反爬效果"><a href="#一、知乎反爬效果" class="headerlink" title="一、知乎反爬效果"></a>一、知乎反爬效果</h2><p>来自知乎回答<a target="_blank" rel="noopener" href="https://www.zhihu.com/question/41922324/answer/3073556909">不被爱是一种什么样的感受? - 知乎</a></p>
<p><img src="/../img/image.png" srcset="/img/loading.gif" lazyload alt="乱码示意图"> </p>
<p>
<picture>
<source lazyload-data data-srcset="/../img/image.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image.png.webp" type="image/webp">
<img src="/../img/image.png" srcset="/img/loading.gif" lazyload alt="乱码示意图">
</picture>
</p>
<p>如图所示,在页面源码中出现了大量乱码,例如(原字,错字):<sup id="fnref:2" class="footnote-ref"><a href="#fn:2" rel="footnote"><span class="hint--top hint--rounded" aria-label="由于知乎回答页反爬使用了两套字体,故本文所有截图,代码运行结果等内容可能与实际不符。你可以选择以实际为主或刷新页面直到页面显示的内容与本文一致。">[2]</span></a></sup></p>
<ul>
<li>中 -&gt; 在</li>
Expand All @@ -314,17 +318,47 @@ <h2 id="一、知乎反爬效果"><a href="#一、知乎反爬效果" class="hea
<p>这些乱码使得文章可读性大大下降,那么乱码是怎么产生的?又如何解决这个问题呢?</p>
<h2 id="二、找寻乱码真凶"><a href="#二、找寻乱码真凶" class="headerlink" title="二、找寻乱码真凶"></a>二、找寻乱码真凶</h2><p>观察上述现象,页面源码中的字,在被显示到页面后,居然变成了正确的字。因此我们初步推断知乎在该页面运用了字体反爬。</p>
<p>接下来我们打开 F12 -&gt; Network 页面,选择 Font,观察知乎加载的字体。</p>
<p><img src="/../img/image-1.png" srcset="/img/loading.gif" lazyload alt="知乎加载的字体"></p>
<p>
<picture>
<source lazyload-data data-srcset="/../img/image-1.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image-1.png.webp" type="image/webp">
<img src="/../img/image-1.png" srcset="/img/loading.gif" lazyload alt="知乎加载的字体">
</picture>
</p>
<p>右键选择 Open in new tab 将字体保存下来。</p>
<p><img src="/../img/image-2.png" srcset="/img/loading.gif" lazyload alt="下载的字体文件"></p>
<p>
<picture>
<source lazyload-data data-srcset="/../img/image-2.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image-2.png.webp" type="image/webp">
<img src="/../img/image-2.png" srcset="/img/loading.gif" lazyload alt="下载的字体文件">
</picture>
</p>
<p>将字体后缀名改为 .ttf <sup id="fnref:3" class="footnote-ref"><a href="#fn:3" rel="footnote"><span class="hint--top hint--rounded" aria-label=".ttf 是因为 `data:font/ttf;...` 代表该字体是 ttf 格式的。">[3]</span></a></sup> 并打开。</p>
<div class="group-image-container"><div class="group-image-row"><div class="group-image-wrap"><img src="/../img/image-3.png" srcset="/img/loading.gif" lazyload alt="正常字体"></div><div class="group-image-wrap"><img src="/../img/image-4.png" srcset="/img/loading.gif" lazyload alt="反爬字体"></div></div></div>
<div class="group-image-container"><div class="group-image-row"><div class="group-image-wrap">
<picture>
<source lazyload-data data-srcset="/../img/image-3.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image-3.png.webp" type="image/webp">
<img src="/../img/image-3.png" srcset="/img/loading.gif" lazyload alt="正常字体">
</picture>
</div><div class="group-image-wrap">
<picture>
<source lazyload-data data-srcset="/../img/image-4.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image-4.png.webp" type="image/webp">
<img src="/../img/image-4.png" srcset="/img/loading.gif" lazyload alt="反爬字体">
</picture>
</div></div></div>
<figcaption aria-hidden="true" class="image-caption">左:正常字体 右:反爬字体</figcaption>

<p>与正常字体对比,我们下载的字体明显替换了部分字体,这便是知乎用于反爬的字体了。接下来我们将分析这个字体并给出应对方案。</p>
<h2 id="三、致命缺陷"><a href="#三、致命缺陷" class="headerlink" title="三、致命缺陷"></a>三、致命缺陷</h2><p>字体反爬的根本原理是替换原本的字为一个新字,再用字体将新字渲染为原字,这样对程序而言就只见到新字而不是旧字了,而用户看到的还是原本的内容。因此只要找到新字与原字间的对应关系便可解决该反爬。而要找到这个对应关系,抓住字体中各个字形的特征是必不可少的一环。</p>
<p>我们打开 <a target="_blank" rel="noopener" href="https://fontdrop.info/">FontDrop!</a> 加载字体,向下翻,观察字形的特征。</p>
<p><img src="/../img/image-5.png" srcset="/img/loading.gif" lazyload alt="字体中的字形"></p>
<p>
<picture>
<source lazyload-data data-srcset="/../img/image-5.png.avif" type="image/avif">
<source lazyload-data data-srcset="/../img/image-5.png.webp" type="image/webp">
<img src="/../img/image-5.png" srcset="/img/loading.gif" lazyload alt="字体中的字形">
</picture>
</p>
<p>我们发现字形的 Glyph 为 uni662F 而 Unicode 为 65F6,接下来我们试着查询这两个十六进制数对应的字:</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs python">glyph = <span class="hljs-string">&quot;\u662F&quot;</span><br>unicode = <span class="hljs-string">&quot;\u65F6&quot;</span><br><span class="hljs-built_in">print</span>(glyph, unicode)<br><span class="hljs-comment"># output: 是 时</span><br></code></pre></td></tr></table></figure>

Expand Down Expand Up @@ -535,9 +569,6 @@ <h2 id="注"><a href="#注" class="headerlink" title="注"></a>注</h2><section









Expand Down Expand Up @@ -736,25 +767,13 @@ <h4 class="modal-title w-100 font-weight-bold">搜索</h4>
</script>



<script>
Fluid.utils.createScript('https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.js', function() {
Fluid.plugins.fancyBox();
});
</script>


<script>Fluid.plugins.imageCaption();</script>

<script src="/js/local-search.js" ></script>





<script src="/js/progressbar_done.js"></script>



<!-- 主题的启动项,将它保持在最底部 -->
<!-- the boot of the theme, keep it at the bottom -->
Expand All @@ -768,5 +787,5 @@ <h4 class="modal-title w-100 font-weight-bold">搜索</h4>
<noscript>
<div class="noscript-warning">博客在允许 JavaScript 运行的环境下浏览效果更佳</div>
</noscript>
</body>
<!-- hexo injector body_end start --><script async src="/js/image-ng.js"></script><script async src="/js/progressbar-done.js"></script><!-- hexo injector body_end end --></body>
</html>
Loading

0 comments on commit 969924f

Please sign in to comment.