content update (#2791)

intel · Apr 19, 2024 · e1f7a89 · e1f7a89
1 parent a274599
commit e1f7a89
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 23 deletions.
diff --git a/llm/llama3/xpu/_sources/index.md.txt b/llm/llama3/xpu/_sources/index.md.txt
@@ -4,7 +4,7 @@ Intel® Extension for PyTorch* provides dedicated optimization for running Llama
 
 # 1. Environment Setup
 
-## 1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home
+## 1.1 Conda-based environment setup with pre-built wheels on Windows 11
 
 ```bash
 # Install Visual Studio 2022
@@ -28,7 +28,7 @@ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
 pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0a0%2Bgit04048c2-cp39-cp39-win_amd64.whl
 
 # Install Intel® Extension for PyTorch*
-pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.1.30%2Bgit03c5535-cp39-cp39-win_amd64.whl
+pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.1.30%2Bgit6661060-cp39-cp39-win_amd64.whl
 
 # Install Intel® Extension for Transformers*
 git clone https://github.com/intel/intel-extension-for-transformers.git intel-extension-for-transformers -b xpu_lm_head 
@@ -107,7 +107,7 @@ python run_generation_gpu_woq.py \
 
 The int4 model is saved in folder ~/llama3_all_int4.
 
-### 2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home
+### 2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11
 
 - Command:
 ```bash
@@ -116,13 +116,19 @@ python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --benchmark
 *Note:* replace ${PATH/TO/MODEL} with actual Llama 3 INT4 model local path
 ```
 
-### 2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home
+### 2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11
 
 - Command:
 ```bash
 set LLM_ACC_TEST=1 
+python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --accuracy --task "openbookqa"
 python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --accuracy --task "piqa"
+python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --accuracy --task "rte"
+python run_generation_gpu_woq_for_llama.py --model ${PATH/TO/MODEL} --accuracy --task "truthfulqa_mc1"
+
 *Note:* replace ${PATH/TO/MODEL} with actual Llama 3 INT4 model local path
+*Note:* you may validate the Llama 3 WOQ INT4 accuracy using any task listed above, such as the first command with "openbookqa" only,
+or validate all of them, depending on your needs. Please expect more time needed for executing more than one task.
 ```
 
 ## Miscellaneous Tips

diff --git a/llm/llama3/xpu/_static/documentation_options.js b/llm/llama3/xpu/_static/documentation_options.js
@@ -10,4 +10,4 @@ const DOCUMENTATION_OPTIONS = {
     NAVIGATION_WITH_KEYS: false,
     SHOW_SEARCH_SUMMARY: true,
     ENABLE_SEARCH_SHORTCUTS: true,
-};
+};
diff --git a/llm/llama3/xpu/genindex.html b/llm/llama3/xpu/genindex.html
@@ -95,7 +95,7 @@ <h1 id="index">Index</h1>
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1deff8ce80> 
+   <jinja2.runtime.BlockReference object at 0x7fcd27da17f0> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
@@ -111,4 +111,4 @@ <h1 id="index">Index</h1>
   </script> 
 
 </body>
-</html>
+</html>
diff --git a/llm/llama3/xpu/index.html b/llm/llama3/xpu/index.html
@@ -54,14 +54,14 @@
               <div class="local-toc"><ul>
 <li><a class="reference internal" href="#">Intel® Extension for PyTorch* Large Language Model (LLM) Feature Get Started For Llama 3 models</a></li>
 <li><a class="reference internal" href="#environment-setup">1. Environment Setup</a><ul>
-<li><a class="reference internal" href="#conda-based-environment-setup-with-pre-built-wheels-on-windows-11-home">1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home</a></li>
+<li><a class="reference internal" href="#conda-based-environment-setup-with-pre-built-wheels-on-windows-11">1.1 Conda-based environment setup with pre-built wheels on Windows 11</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#how-to-run-llama-3">2. How To Run Llama 3</a><ul>
 <li><a class="reference internal" href="#usage-of-running-llama-3-models">2.1 Usage of running Llama 3 models</a><ul>
 <li><a class="reference internal" href="#int4-woq-model">2.1.1 INT4 WOQ Model</a></li>
-<li><a class="reference internal" href="#measure-llama-3-woq-int4-performance-on-windows-11-home">2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home</a></li>
-<li><a class="reference internal" href="#validate-llama-3-woq-int4-accuracy-on-windows-11-home">2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home</a></li>
+<li><a class="reference internal" href="#measure-llama-3-woq-int4-performance-on-windows-11">2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11</a></li>
+<li><a class="reference internal" href="#validate-llama-3-woq-int4-accuracy-on-windows-11">2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#miscellaneous-tips">Miscellaneous Tips</a></li>
@@ -99,8 +99,8 @@ <h1>Intel® Extension for PyTorch* Large Language Model (LLM) Feature Get Starte
 </section>
 <section id="environment-setup">
 <h1>1. Environment Setup<a class="headerlink" href="#environment-setup" title="Link to this heading"></a></h1>
-<section id="conda-based-environment-setup-with-pre-built-wheels-on-windows-11-home">
-<h2>1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home<a class="headerlink" href="#conda-based-environment-setup-with-pre-built-wheels-on-windows-11-home" title="Link to this heading"></a></h2>
+<section id="conda-based-environment-setup-with-pre-built-wheels-on-windows-11">
+<h2>1.1 Conda-based environment setup with pre-built wheels on Windows 11<a class="headerlink" href="#conda-based-environment-setup-with-pre-built-wheels-on-windows-11" title="Link to this heading"></a></h2>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Install Visual Studio 2022</span>
 https://visualstudio.microsoft.com/zh-hans/thank-you-downloading-visual-studio/?sku<span class="o">=</span>Community<span class="p">&amp;</span><span class="nv">channel</span><span class="o">=</span>Release<span class="p">&amp;</span><span class="nv">version</span><span class="o">=</span>VS2022<span class="p">&amp;</span><span class="nv">source</span><span class="o">=</span>VSLandingPage<span class="p">&amp;</span><span class="nv">cid</span><span class="o">=</span><span class="m">2030</span><span class="p">&amp;</span><span class="nv">passive</span><span class="o">=</span><span class="nb">false</span>
 
@@ -122,7 +122,7 @@ <h2>1.1 Conda-based environment setup with pre-built wheels on Windows 11 Home<a
 pip<span class="w"> </span>install<span class="w"> </span>https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0a0%2Bgit04048c2-cp39-cp39-win_amd64.whl
 
 <span class="c1"># Install Intel® Extension for PyTorch*</span>
-pip<span class="w"> </span>install<span class="w"> </span>https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.1.30%2Bgit03c5535-cp39-cp39-win_amd64.whl
+pip<span class="w"> </span>install<span class="w"> </span>https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.1.30%2Bgit6661060-cp39-cp39-win_amd64.whl
 
 <span class="c1"># Install Intel® Extension for Transformers*</span>
 git<span class="w"> </span>clone<span class="w"> </span>https://github.com/intel/intel-extension-for-transformers.git<span class="w"> </span>intel-extension-for-transformers<span class="w"> </span>-b<span class="w"> </span>xpu_lm_head<span class="w"> </span>
@@ -221,8 +221,8 @@ <h3>2.1.1 INT4 WOQ Model<a class="headerlink" href="#int4-woq-model" title="Link
 </div>
 <p>The int4 model is saved in folder ~/llama3_all_int4.</p>
 </section>
-<section id="measure-llama-3-woq-int4-performance-on-windows-11-home">
-<h3>2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home<a class="headerlink" href="#measure-llama-3-woq-int4-performance-on-windows-11-home" title="Link to this heading"></a></h3>
+<section id="measure-llama-3-woq-int4-performance-on-windows-11">
+<h3>2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11<a class="headerlink" href="#measure-llama-3-woq-int4-performance-on-windows-11" title="Link to this heading"></a></h3>
 <ul class="simple">
 <li><p>Command:</p></li>
 </ul>
@@ -232,14 +232,20 @@ <h3>2.1.2 Measure Llama 3 WOQ INT4 Performance on Windows 11 Home<a class="heade
 </pre></div>
 </div>
 </section>
-<section id="validate-llama-3-woq-int4-accuracy-on-windows-11-home">
-<h3>2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11 Home<a class="headerlink" href="#validate-llama-3-woq-int4-accuracy-on-windows-11-home" title="Link to this heading"></a></h3>
+<section id="validate-llama-3-woq-int4-accuracy-on-windows-11">
+<h3>2.1.3 Validate Llama 3 WOQ INT4 Accuracy on Windows 11<a class="headerlink" href="#validate-llama-3-woq-int4-accuracy-on-windows-11" title="Link to this heading"></a></h3>
 <ul class="simple">
 <li><p>Command:</p></li>
 </ul>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">set</span><span class="w"> </span><span class="nv">LLM_ACC_TEST</span><span class="o">=</span><span class="m">1</span><span class="w"> </span>
+python<span class="w"> </span>run_generation_gpu_woq_for_llama.py<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">PATH</span><span class="p">/TO/MODEL</span><span class="si">}</span><span class="w"> </span>--accuracy<span class="w"> </span>--task<span class="w"> </span><span class="s2">&quot;openbookqa&quot;</span>
 python<span class="w"> </span>run_generation_gpu_woq_for_llama.py<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">PATH</span><span class="p">/TO/MODEL</span><span class="si">}</span><span class="w"> </span>--accuracy<span class="w"> </span>--task<span class="w"> </span><span class="s2">&quot;piqa&quot;</span>
+python<span class="w"> </span>run_generation_gpu_woq_for_llama.py<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">PATH</span><span class="p">/TO/MODEL</span><span class="si">}</span><span class="w"> </span>--accuracy<span class="w"> </span>--task<span class="w"> </span><span class="s2">&quot;rte&quot;</span>
+python<span class="w"> </span>run_generation_gpu_woq_for_llama.py<span class="w"> </span>--model<span class="w"> </span><span class="si">${</span><span class="nv">PATH</span><span class="p">/TO/MODEL</span><span class="si">}</span><span class="w"> </span>--accuracy<span class="w"> </span>--task<span class="w"> </span><span class="s2">&quot;truthfulqa_mc1&quot;</span>
+
 *Note:*<span class="w"> </span>replace<span class="w"> </span><span class="si">${</span><span class="nv">PATH</span><span class="p">/TO/MODEL</span><span class="si">}</span><span class="w"> </span>with<span class="w"> </span>actual<span class="w"> </span>Llama<span class="w"> </span><span class="m">3</span><span class="w"> </span>INT4<span class="w"> </span>model<span class="w"> </span><span class="nb">local</span><span class="w"> </span>path
+*Note:*<span class="w"> </span>you<span class="w"> </span>may<span class="w"> </span>validate<span class="w"> </span>the<span class="w"> </span>Llama<span class="w"> </span><span class="m">3</span><span class="w"> </span>WOQ<span class="w"> </span>INT4<span class="w"> </span>accuracy<span class="w"> </span>using<span class="w"> </span>any<span class="w"> </span>task<span class="w"> </span>listed<span class="w"> </span>above,<span class="w"> </span>such<span class="w"> </span>as<span class="w"> </span>the<span class="w"> </span>first<span class="w"> </span><span class="nb">command</span><span class="w"> </span>with<span class="w"> </span><span class="s2">&quot;openbookqa&quot;</span><span class="w"> </span>only,
+or<span class="w"> </span>validate<span class="w"> </span>all<span class="w"> </span>of<span class="w"> </span>them,<span class="w"> </span>depending<span class="w"> </span>on<span class="w"> </span>your<span class="w"> </span>needs.<span class="w"> </span>Please<span class="w"> </span>expect<span class="w"> </span>more<span class="w"> </span><span class="nb">time</span><span class="w"> </span>needed<span class="w"> </span><span class="k">for</span><span class="w"> </span>executing<span class="w"> </span>more<span class="w"> </span>than<span class="w"> </span>one<span class="w"> </span>task.
 </pre></div>
 </div>
 </section>
@@ -264,7 +270,7 @@ <h2>Miscellaneous Tips<a class="headerlink" href="#miscellaneous-tips" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1deffaf130> 
+   <jinja2.runtime.BlockReference object at 0x7fcd27dcb7c0> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
@@ -280,4 +286,4 @@ <h2>Miscellaneous Tips<a class="headerlink" href="#miscellaneous-tips" title="Li
   </script> 
 
 </body>
-</html>
+</html>
diff --git a/llm/llama3/xpu/objects.inv b/llm/llama3/xpu/objects.inv
@@ -3,4 +3,4 @@
 # Version: 2.1.30+xpu
 # The remainder of this file is compressed using zlib.
 xڅ��
-�0��{�U� ޼;6��k�]+m�K�>���@������d[�h���`�J��y�m�3p���cS��L�y>�80��:7�/���r���Zݣ�S��*ϋ5d��{�1T��IA&ln�C�C7yC"�G������C.����_�����:J�%w���^ʇ�X7r�
+�0��{�U� ޼;6��k�]+m�K�>���@������d[�h���`�J��y�m�3p���cS��L�y>�80��:7�/���r���Zݣ�S��*ϋ5d��{�1T��IA&ln�C�C7yC"�G������C.����_�����:J�%w���^ʇ�X7r�
diff --git a/llm/llama3/xpu/search.html b/llm/llama3/xpu/search.html
@@ -103,7 +103,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-   <jinja2.runtime.BlockReference object at 0x7f1deff4c610> 
+   <jinja2.runtime.BlockReference object at 0x7fcd29e91ee0> 
 <p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a> <a href="/#" data-wap_ref="dns" id="wap_dns"><small>| Your Privacy Choices</small></a> <a href=https://www.intel.com/content/www/us/en/privacy/privacy-residents-certain-states.html data-wap_ref="nac" id="wap_nac"><small>| Notice at Collection</small></a> </div> <p></p> <div>&copy; Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), <a href='http://opensource.org/licenses/0BSD'>http://opensource.org/licenses/0BSD</a>. </div>
 
 
@@ -126,4 +126,4 @@
 
 
 </body>
-</html>
+</html>